BST 260 Final Project Group: Chuying Ma, Jingjing Tang, Jie Yin, Xuewei Zhang
This Rmd file is for studying the relationship between other variables and mental health:
Mental health patterns in 2000 for all US counties in a map:
library(maps)
library(RColorBrewer)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(readr)
data_2000 = read.csv("data_2000.csv")
ment_2000 <- data_2000 %>%
dplyr::select(fips,menthlth)
data(county.fips)
cnty <- map_data("county")
men_map_00 <- cnty %>%
mutate(polyname = paste(region,subregion,sep=",")) %>%
left_join(county.fips, by="polyname")
men_df_00 <- inner_join(men_map_00, ment_2000, by=c("fips" = "fips") )
map_00 = ggplot(men_df_00, aes(long, lat,group = group)) +
geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
scale_x_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_00
mental health patterns in 2001 for all US counties in a map:
data_2001 = read.csv("data_2001.csv")
ment_2001 <- data_2001 %>% dplyr::select(fips,menthlth)
data(county.fips)
cnty <- map_data("county")
men_map_01 <- cnty %>%
mutate(polyname = paste(region,subregion,sep=",")) %>%
left_join(county.fips, by="polyname")
men_df_01 <- inner_join(men_map_01, ment_2001, by=c("fips" = "fips") )
map_01 = ggplot(men_df_01, aes(long, lat,group = group)) +
geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
scale_x_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_01
mental health patterns in 2002 for all US counties in a map:
data_2002 = read.csv("data_2002.csv")
ment_2002 <- data_2002 %>% dplyr::select(fips,menthlth)
data(county.fips)
cnty <- map_data("county")
men_map_02 <- cnty %>%
mutate(polyname = paste(region,subregion,sep=",")) %>%
left_join(county.fips, by="polyname")
men_df_02 <- inner_join(men_map_02, ment_2002, by=c("fips" = "fips") )
map_02 = ggplot(men_df_02, aes(long, lat,group = group)) +
geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
scale_x_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_02
mental health patterns in 2003 for all US counties in a map:
data_2003 = read.csv("data_2003.csv")
ment_2003 <- data_2003 %>% dplyr::select(fips,menthlth)
data(county.fips)
cnty <- map_data("county")
men_map_03 <- cnty %>%
mutate(polyname = paste(region,subregion,sep=",")) %>%
left_join(county.fips, by="polyname")
men_df_03 <- inner_join(men_map_03, ment_2003, by=c("fips" = "fips") )
map_03 = ggplot(men_df_03, aes(long, lat,group = group)) +
geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
scale_x_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_03
mental health patterns in 2004 for all US counties in a map:
data_2004 = read.csv("data_2004.csv")
ment_2004 <- data_2004 %>% dplyr::select(fips,menthlth)
data(county.fips)
cnty <- map_data("county")
men_map_04 <- cnty %>%
mutate(polyname = paste(region,subregion,sep=",")) %>%
left_join(county.fips, by="polyname")
men_df_04 <- inner_join(men_map_04, ment_2004, by=c("fips" = "fips") )
map_04 = ggplot(men_df_04, aes(long, lat,group = group)) +
geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
scale_x_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_04
mental health patterns in 2005 for all US counties in a map:
data_2005 = read.csv("data_2005.csv")
ment_2005 <- data_2005 %>% dplyr::select(fips,menthlth)
data(county.fips)
cnty <- map_data("county")
men_map_05 <- cnty %>%
mutate(polyname = paste(region,subregion,sep=",")) %>%
left_join(county.fips, by="polyname")
men_df_05 <- inner_join(men_map_05, ment_2005, by=c("fips" = "fips"))
map_05 = ggplot(men_df_05, aes(long, lat,group = group)) +
geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
scale_x_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_05
mental health patterns in 2006 for all US counties in a map:
data_2006 = read.csv("data_2006.csv")
ment_2006 <- data_2006 %>% dplyr::select(fips,menthlth)
data(county.fips)
cnty <- map_data("county")
men_map_06 <- cnty %>%
mutate(polyname = paste(region,subregion,sep=",")) %>%
left_join(county.fips, by="polyname")
men_df_06 <- inner_join(men_map_06, ment_2006, by=c("fips" = "fips"))
map_06 = ggplot(men_df_06, aes(long, lat,group = group)) +
geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
scale_x_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_06
mental health patterns in 2007 for all US counties in a map:
data_2007 = read.csv("data_2007.csv")
ment_2007 <- data_2007 %>% dplyr::select(fips,menthlth)
data(county.fips)
cnty <- map_data("county")
men_map_07 <- cnty %>%
mutate(polyname = paste(region,subregion,sep=",")) %>%
left_join(county.fips, by="polyname")
men_df_07 <- inner_join(men_map_07, ment_2007, by=c("fips" = "fips"))
map_07 = ggplot(men_df_07, aes(long, lat,group = group)) +
geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
scale_x_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_07
mental health patterns in 2008 for all US counties in a map:
data_2008 = read.csv("2008_data.csv")
ment_2008 <- data_2008 %>% dplyr::select(fips,menthlth)
data(county.fips)
cnty <- map_data("county")
men_map_08 <- cnty %>%
mutate(polyname = paste(region,subregion,sep=",")) %>%
left_join(county.fips, by="polyname")
men_df_08 <- inner_join(men_map_08, ment_2008, by=c("fips" = "fips"))
map_08 = ggplot(men_df_08, aes(long, lat,group = group)) +
geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
scale_x_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_08
mental health patterns in 2009 for all US counties in a map:
data_2009 = read.csv("2009_data.csv")
ment_2009 <- data_2009 %>% dplyr::select(fips,menthlth)
data(county.fips)
cnty <- map_data("county")
men_map_09 <- cnty %>%
mutate(polyname = paste(region,subregion,sep=",")) %>%
left_join(county.fips, by="polyname")
men_df_09 <- inner_join(men_map_09, ment_2009, by=c("fips" = "fips"))
map_09 = ggplot(men_df_09, aes(long, lat,group = group)) +
geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
scale_x_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_09
mental health patterns in 2010 for all US counties in a map:
data_2010 = read.csv("2010_data.csv")
ment_2010 <- data_2010 %>% dplyr::select(fips,menthlth)
data(county.fips)
cnty <- map_data("county")
men_map_10 <- cnty %>%
mutate(polyname = paste(region,subregion,sep=",")) %>%
left_join(county.fips, by="polyname")
men_df_10 <- inner_join(men_map_10, ment_2010, by=c("fips" = "fips"))
map_10 = ggplot(men_df_10, aes(long, lat,group = group)) +
geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
scale_x_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_10
map in the same plot using facet:
mental_data = data.frame()
mental_data = rbind(data_2000,data_2001,data_2002)
mental_data = rbind(mental_data,data_2003,data_2004)
mental_data = rbind(mental_data,data_2005,data_2006)
mental_data = rbind(mental_data,data_2007,data_2008,data_2009,data_2010)
write_csv(mental_data,"10_year_data.csv")
mental_data = read_csv("10_year_data.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## fips = col_integer(),
## state = col_integer(),
## county = col_integer(),
## year = col_integer()
## )
## See spec(...) for full column specifications.
ment <- mental_data %>% dplyr::select(fips,menthlth,year)
cnty <- map_data("county")
men_map <- cnty %>%
mutate(polyname = paste(region,subregion,sep=",")) %>%
left_join(county.fips, by="polyname")
men_df <- inner_join(men_map, ment, by=c("fips" = "fips"))
map_all_year = ggplot(men_df, aes(long, lat,group = group)) +
geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
scale_x_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt") +
facet_wrap(~year,ncol = 3) +
ggtitle("Number of Days Mental Health Not Good in US counties from 2000 to 2010")
map_all_year
write state level data for exposure:
combine with pm2.5 and ozone:
pm2.5 = read_csv("PM25_Ozone_county_2000_2012.csv")
pm2.5$COUNTY = as.numeric(pm2.5$COUNTY)
pm2.5_00 = pm2.5 %>%
select(COUNTY,PM25_2000,Ozone_2000)
data_2000 = data_2000 %>%
left_join(pm2.5_00,by = c("fips" = "COUNTY"))
colnames(data_2000)[48] = "pm2.5"
colnames(data_2000)[49] = "ozone"
write_csv(data_2000,"data_00_expo.csv")
pm2.5_01 = pm2.5 %>%
select(COUNTY,PM25_2001,Ozone_2001)
data_2001 = data_2001 %>%
left_join(pm2.5_01,by = c("fips" = "COUNTY"))
colnames(data_2001)[48] = "pm2.5"
colnames(data_2001)[49] = "ozone"
write_csv(data_2001,"data_01_expo.csv")
pm2.5_02 = pm2.5 %>%
select(COUNTY,PM25_2002,Ozone_2002)
data_2002 = data_2002 %>%
left_join(pm2.5_02,by = c("fips" = "COUNTY"))
colnames(data_2002)[48] = "pm2.5"
colnames(data_2002)[49] = "ozone"
write_csv(data_2002,"data_02_expo.csv")
pm2.5_03 = pm2.5 %>%
select(COUNTY,PM25_2003,Ozone_2003)
data_2003 = data_2003 %>%
left_join(pm2.5_03,by = c("fips" = "COUNTY"))
colnames(data_2003)[48] = "pm2.5"
colnames(data_2003)[49] = "ozone"
write_csv(data_2003,"data_03_expo.csv")
pm2.5_04 = pm2.5 %>%
select(COUNTY,PM25_2004,Ozone_2004)
data_2004 = data_2004 %>%
left_join(pm2.5_04,by = c("fips" = "COUNTY"))
colnames(data_2004)[48] = "pm2.5"
colnames(data_2004)[49] = "ozone"
write_csv(data_2004,"data_04_expo.csv")
pm2.5_05 = pm2.5 %>%
select(COUNTY,PM25_2005,Ozone_2005)
data_2005 = data_2005 %>%
left_join(pm2.5_05,by = c("fips" = "COUNTY"))
colnames(data_2005)[48] = "pm2.5"
colnames(data_2005)[49] = "ozone"
write_csv(data_2005,"data_05_expo.csv")
pm2.5_06 = pm2.5 %>%
select(COUNTY,PM25_2006,Ozone_2006)
data_2006 = data_2006 %>%
left_join(pm2.5_06,by = c("fips" = "COUNTY"))
colnames(data_2006)[48] = "pm2.5"
colnames(data_2006)[49] = "ozone"
write_csv(data_2006,"data_06_expo.csv")
pm2.5_07 = pm2.5 %>%
select(COUNTY,PM25_2007,Ozone_2007)
data_2007 = data_2007 %>%
left_join(pm2.5_07,by = c("fips" = "COUNTY"))
colnames(data_2007)[48] = "pm2.5"
colnames(data_2007)[49] = "ozone"
write_csv(data_2007,"data_07_expo.csv")
pm2.5_08 = pm2.5 %>%
select(COUNTY,PM25_2008,Ozone_2008)
data_2008 = data_2008 %>%
left_join(pm2.5_08,by = c("fips" = "COUNTY"))
colnames(data_2008)[48] = "pm2.5"
colnames(data_2008)[49] = "ozone"
write_csv(data_2008,"data_08_expo.csv")
pm2.5_09 = pm2.5 %>%
select(COUNTY,PM25_2009,Ozone_2009)
data_2009 = data_2009 %>%
left_join(pm2.5_09,by = c("fips" = "COUNTY"))
colnames(data_2009)[48] = "pm2.5"
colnames(data_2009)[49] = "ozone"
write_csv(data_2009,"data_09_expo.csv")
pm2.5_10 = pm2.5 %>%
select(COUNTY,PM25_2010,Ozone_2010)
data_2010 = data_2010 %>%
left_join(pm2.5_10,by = c("fips" = "COUNTY"))
colnames(data_2010)[48] = "pm2.5"
colnames(data_2010)[49] = "ozone"
write_csv(data_2010,"data_10_expo.csv")
combine all data to write a csv file for future use:
all_expo = rbind(data_2000,data_2001,data_2002,data_2003,data_2004,data_2005,data_2006,data_2007,data_2008,data_2009,data_2010)
write_csv(all_expo,"data_exposure_00_10.csv")
run linear regression for all covariates:
library(readr)
library(dplyr)
data_all = read_csv("data_3_expo.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## fips = col_integer(),
## state = col_integer(),
## county = col_integer()
## )
## See spec(...) for full column specifications.
reg_data = data_all %>%
dplyr::select(-c(fips,state,county,year,heartattack,ACheartdis,stroke,asthma))
full_men_model = lm(menthlth ~ .,data = reg_data)
summary(full_men_model)
##
## Call:
## lm(formula = menthlth ~ ., data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.2722 -1.0666 -0.0411 0.9740 21.1965
##
## Coefficients: (6 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.322e+02 3.610e+02 -0.366 0.714149
## age 2.678e-03 1.944e-02 0.138 0.890415
## sex 1.182e+00 6.650e-01 1.777 0.075624 .
## white -2.932e-02 1.384e-01 -0.212 0.832197
## black 9.434e-02 1.912e-01 0.493 0.621805
## asian -5.513e-01 4.068e-01 -1.355 0.175451
## race_other NA NA NA NA
## educ1 1.030e+00 1.340e+00 0.769 0.442048
## educ2 1.552e+00 5.739e-01 2.704 0.006884 **
## educ3 NA NA NA NA
## income1 -7.363e+00 1.303e+00 -5.651 1.71e-08 ***
## income2 -1.078e-01 1.194e+00 -0.090 0.928069
## income3 -2.024e-01 1.115e+00 -0.182 0.855901
## income4 -2.392e+00 9.302e-01 -2.572 0.010156 *
## income5 -5.853e-01 8.726e-01 -0.671 0.502461
## income6 -1.296e+00 8.307e-01 -1.561 0.118713
## income7 -8.315e-01 8.679e-01 -0.958 0.338087
## income8 NA NA NA NA
## married -1.362e+00 5.790e-01 -2.352 0.018719 *
## unmarried NA NA NA NA
## employed -3.149e+00 8.035e-01 -3.919 9.04e-05 ***
## out_of_work 5.473e+00 1.451e+00 3.773 0.000164 ***
## homemaker -2.828e+00 1.183e+00 -2.390 0.016897 *
## student -3.301e+00 2.666e+00 -1.238 0.215814
## employ_other NA NA NA NA
## hlthplan -3.380e+00 8.761e-01 -3.859 0.000116 ***
## exercise -1.243e+00 6.962e-01 -1.786 0.074257 .
## smoke 4.265e+00 7.806e-01 5.463 4.98e-08 ***
## drink -3.131e+00 4.488e-01 -6.977 3.55e-12 ***
## bmi_cat1 -1.759e+00 1.466e+00 -1.200 0.230116
## bmi_cat2 -1.531e+00 1.087e+00 -1.408 0.159127
## bmi_cat3 NA NA NA NA
## bmi_cts -8.387e-02 9.851e-02 -0.851 0.394641
## genhlth_ex 1.514e+02 3.610e+02 0.419 0.675007
## genhlth_vg 1.489e+02 3.610e+02 0.413 0.679974
## genhlth_go 1.498e+02 3.609e+02 0.415 0.678214
## genhlth_fa 1.515e+02 3.609e+02 0.420 0.674751
## genhlth_po 1.627e+02 3.609e+02 0.451 0.652257
## qlactlm 4.341e+00 7.357e-01 5.901 3.94e-09 ***
## pm2.5 -8.277e-04 1.906e-02 -0.043 0.965356
## ozone 1.394e+01 7.910e+00 1.763 0.078051 .
## greenness 1.469e+00 3.530e-01 4.162 3.22e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.192 on 3715 degrees of freedom
## (13135 observations deleted due to missingness)
## Multiple R-squared: 0.3877, Adjusted R-squared: 0.382
## F-statistic: 67.22 on 35 and 3715 DF, p-value: < 2.2e-16
delete income2:
mod1 = lm(menthlth ~ age + sex + white + black + asian + educ1 + educ2 + income1 + income3 + income4 + income5 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + bmi_cat1 + bmi_cat2 + bmi_cts + genhlth_ex + genhlth_vg + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod1)
##
## Call:
## lm(formula = menthlth ~ age + sex + white + black + asian + educ1 +
## educ2 + income1 + income3 + income4 + income5 + income6 +
## income7 + married + employed + out_of_work + homemaker +
## student + hlthplan + exercise + smoke + drink + bmi_cat1 +
## bmi_cat2 + bmi_cts + genhlth_ex + genhlth_vg + genhlth_go +
## genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,
## data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.2817 -1.0664 -0.0413 0.9675 21.1811
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.340e+02 3.594e+02 -0.373 0.709296
## age 2.257e-03 1.918e-02 0.118 0.906355
## sex 1.154e+00 6.607e-01 1.747 0.080694 .
## white -2.902e-02 1.376e-01 -0.211 0.832967
## black 9.504e-02 1.897e-01 0.501 0.616401
## asian -5.149e-01 3.950e-01 -1.303 0.192488
## educ1 9.588e-01 1.313e+00 0.730 0.465443
## educ2 1.490e+00 5.509e-01 2.705 0.006869 **
## income1 -7.343e+00 1.267e+00 -5.794 7.45e-09 ***
## income3 -1.800e-01 1.071e+00 -0.168 0.866523
## income4 -2.345e+00 8.879e-01 -2.641 0.008306 **
## income5 -5.430e-01 8.462e-01 -0.642 0.521096
## income6 -1.293e+00 8.018e-01 -1.613 0.106793
## income7 -8.431e-01 8.409e-01 -1.003 0.316118
## married -1.348e+00 5.555e-01 -2.427 0.015278 *
## employed -3.133e+00 7.870e-01 -3.982 6.97e-05 ***
## out_of_work 5.417e+00 1.440e+00 3.762 0.000171 ***
## homemaker -2.852e+00 1.173e+00 -2.432 0.015074 *
## student -3.242e+00 2.647e+00 -1.225 0.220729
## hlthplan -3.399e+00 8.625e-01 -3.941 8.25e-05 ***
## exercise -1.286e+00 6.911e-01 -1.861 0.062830 .
## smoke 4.293e+00 7.734e-01 5.550 3.05e-08 ***
## drink -3.153e+00 4.426e-01 -7.123 1.26e-12 ***
## bmi_cat1 -1.765e+00 1.456e+00 -1.212 0.225573
## bmi_cat2 -1.520e+00 1.080e+00 -1.407 0.159405
## bmi_cts -8.627e-02 9.788e-02 -0.881 0.378169
## genhlth_ex 1.533e+02 3.593e+02 0.427 0.669732
## genhlth_vg 1.508e+02 3.593e+02 0.420 0.674759
## genhlth_go 1.516e+02 3.593e+02 0.422 0.673026
## genhlth_fa 1.534e+02 3.593e+02 0.427 0.669538
## genhlth_po 1.646e+02 3.593e+02 0.458 0.647021
## qlactlm 4.286e+00 7.286e-01 5.882 4.42e-09 ***
## pm2.5 8.248e-04 1.876e-02 0.044 0.964932
## ozone 1.383e+01 7.797e+00 1.774 0.076140 .
## greenness 1.491e+00 3.486e-01 4.276 1.95e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.182 on 3758 degrees of freedom
## (13093 observations deleted due to missingness)
## Multiple R-squared: 0.3902, Adjusted R-squared: 0.3847
## F-statistic: 70.73 on 34 and 3758 DF, p-value: < 2.2e-16
delete age:
mod2 = lm(menthlth ~ sex + white + black + asian + educ1 + educ2 + income1 + income3 + income4 + income5 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + bmi_cat1 + bmi_cat2 + bmi_cts + genhlth_ex + genhlth_vg + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod2)
##
## Call:
## lm(formula = menthlth ~ sex + white + black + asian + educ1 +
## educ2 + income1 + income3 + income4 + income5 + income6 +
## income7 + married + employed + out_of_work + homemaker +
## student + hlthplan + exercise + smoke + drink + bmi_cat1 +
## bmi_cat2 + bmi_cts + genhlth_ex + genhlth_vg + genhlth_go +
## genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,
## data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.2667 -1.0645 -0.0422 0.9689 21.2022
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.352e+02 3.592e+02 -0.377 0.70656
## sex 1.156e+00 6.604e-01 1.751 0.08007 .
## white -2.868e-02 1.376e-01 -0.208 0.83488
## black 9.442e-02 1.896e-01 0.498 0.61851
## asian -5.182e-01 3.940e-01 -1.315 0.18859
## educ1 9.475e-01 1.310e+00 0.723 0.46945
## educ2 1.494e+00 5.500e-01 2.716 0.00665 **
## income1 -7.354e+00 1.263e+00 -5.821 6.35e-09 ***
## income3 -1.702e-01 1.068e+00 -0.159 0.87336
## income4 -2.342e+00 8.874e-01 -2.639 0.00836 **
## income5 -5.411e-01 8.459e-01 -0.640 0.52242
## income6 -1.297e+00 8.012e-01 -1.619 0.10557
## income7 -8.450e-01 8.406e-01 -1.005 0.31486
## married -1.347e+00 5.553e-01 -2.425 0.01534 *
## employed -3.195e+00 5.852e-01 -5.461 5.05e-08 ***
## out_of_work 5.358e+00 1.349e+00 3.972 7.26e-05 ***
## homemaker -2.901e+00 1.096e+00 -2.646 0.00818 **
## student -3.364e+00 2.435e+00 -1.382 0.16719
## hlthplan -3.385e+00 8.540e-01 -3.964 7.51e-05 ***
## exercise -1.292e+00 6.889e-01 -1.876 0.06075 .
## smoke 4.268e+00 7.436e-01 5.739 1.03e-08 ***
## drink -3.147e+00 4.397e-01 -7.156 9.91e-13 ***
## bmi_cat1 -1.779e+00 1.451e+00 -1.226 0.22014
## bmi_cat2 -1.528e+00 1.078e+00 -1.417 0.15650
## bmi_cts -8.779e-02 9.701e-02 -0.905 0.36556
## genhlth_ex 1.547e+02 3.591e+02 0.431 0.66657
## genhlth_vg 1.523e+02 3.591e+02 0.424 0.67157
## genhlth_go 1.531e+02 3.591e+02 0.426 0.66983
## genhlth_fa 1.548e+02 3.591e+02 0.431 0.66635
## genhlth_po 1.660e+02 3.591e+02 0.462 0.64390
## qlactlm 4.289e+00 7.278e-01 5.893 4.12e-09 ***
## pm2.5 6.537e-04 1.870e-02 0.035 0.97212
## ozone 1.376e+01 7.770e+00 1.771 0.07671 .
## greenness 1.489e+00 3.482e-01 4.275 1.96e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.182 on 3759 degrees of freedom
## (13093 observations deleted due to missingness)
## Multiple R-squared: 0.3902, Adjusted R-squared: 0.3849
## F-statistic: 72.9 on 33 and 3759 DF, p-value: < 2.2e-16
delete income3:
mod3 = lm(menthlth ~ sex + white + black + asian + educ1 + educ2 + income1 + income4 + income5 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + bmi_cat1 + bmi_cat2 + bmi_cts + genhlth_ex + genhlth_vg + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod3)
##
## Call:
## lm(formula = menthlth ~ sex + white + black + asian + educ1 +
## educ2 + income1 + income4 + income5 + income6 + income7 +
## married + employed + out_of_work + homemaker + student +
## hlthplan + exercise + smoke + drink + bmi_cat1 + bmi_cat2 +
## bmi_cts + genhlth_ex + genhlth_vg + genhlth_go + genhlth_fa +
## genhlth_po + qlactlm + pm2.5 + ozone + greenness, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.2744 -1.0633 -0.0421 0.9680 21.2147
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.356e+02 3.591e+02 -0.378 0.70580
## sex 1.145e+00 6.566e-01 1.744 0.08126 .
## white -2.893e-02 1.375e-01 -0.210 0.83339
## black 9.551e-02 1.895e-01 0.504 0.61420
## asian -5.173e-01 3.939e-01 -1.313 0.18925
## educ1 9.197e-01 1.298e+00 0.709 0.47861
## educ2 1.475e+00 5.371e-01 2.746 0.00606 **
## income1 -7.308e+00 1.229e+00 -5.945 3.02e-09 ***
## income4 -2.314e+00 8.702e-01 -2.659 0.00787 **
## income5 -5.186e-01 8.339e-01 -0.622 0.53407
## income6 -1.277e+00 7.909e-01 -1.614 0.10656
## income7 -8.203e-01 8.261e-01 -0.993 0.32078
## married -1.334e+00 5.490e-01 -2.429 0.01517 *
## employed -3.182e+00 5.795e-01 -5.492 4.24e-08 ***
## out_of_work 5.360e+00 1.349e+00 3.974 7.19e-05 ***
## homemaker -2.880e+00 1.088e+00 -2.646 0.00818 **
## student -3.372e+00 2.434e+00 -1.386 0.16591
## hlthplan -3.359e+00 8.375e-01 -4.010 6.18e-05 ***
## exercise -1.294e+00 6.887e-01 -1.880 0.06023 .
## smoke 4.264e+00 7.431e-01 5.737 1.04e-08 ***
## drink -3.139e+00 4.368e-01 -7.186 7.99e-13 ***
## bmi_cat1 -1.791e+00 1.449e+00 -1.236 0.21650
## bmi_cat2 -1.538e+00 1.076e+00 -1.430 0.15288
## bmi_cts -8.859e-02 9.687e-02 -0.915 0.36049
## genhlth_ex 1.550e+02 3.590e+02 0.432 0.66587
## genhlth_vg 1.526e+02 3.590e+02 0.425 0.67088
## genhlth_go 1.534e+02 3.590e+02 0.427 0.66915
## genhlth_fa 1.551e+02 3.590e+02 0.432 0.66567
## genhlth_po 1.663e+02 3.590e+02 0.463 0.64322
## qlactlm 4.283e+00 7.265e-01 5.895 4.08e-09 ***
## pm2.5 8.487e-04 1.866e-02 0.045 0.96372
## ozone 1.379e+01 7.767e+00 1.775 0.07596 .
## greenness 1.492e+00 3.477e-01 4.291 1.82e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.182 on 3760 degrees of freedom
## (13093 observations deleted due to missingness)
## Multiple R-squared: 0.3902, Adjusted R-squared: 0.385
## F-statistic: 75.19 on 32 and 3760 DF, p-value: < 2.2e-16
delete white:
mod4 = lm(menthlth ~ sex + black + asian + educ1 + educ2 + income1 + income4 + income5 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + bmi_cat1 + bmi_cat2 + bmi_cts + genhlth_ex + genhlth_vg + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod4)
##
## Call:
## lm(formula = menthlth ~ sex + black + asian + educ1 + educ2 +
## income1 + income4 + income5 + income6 + income7 + married +
## employed + out_of_work + homemaker + student + hlthplan +
## exercise + smoke + drink + bmi_cat1 + bmi_cat2 + bmi_cts +
## genhlth_ex + genhlth_vg + genhlth_go + genhlth_fa + genhlth_po +
## qlactlm + pm2.5 + ozone + greenness, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.3060 -1.0587 -0.0427 0.9594 21.1859
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.369e+02 3.590e+02 -0.381 0.70293
## sex 1.151e+00 6.557e-01 1.755 0.07928 .
## black 1.351e-01 1.502e-01 0.899 0.36860
## asian -3.681e-01 3.662e-01 -1.005 0.31483
## educ1 9.121e-01 1.297e+00 0.703 0.48189
## educ2 1.495e+00 5.358e-01 2.791 0.00528 **
## income1 -7.303e+00 1.228e+00 -5.947 2.99e-09 ***
## income4 -2.279e+00 8.692e-01 -2.622 0.00877 **
## income5 -5.108e-01 8.319e-01 -0.614 0.53927
## income6 -1.271e+00 7.896e-01 -1.610 0.10747
## income7 -8.344e-01 8.250e-01 -1.011 0.31190
## married -1.285e+00 5.474e-01 -2.348 0.01893 *
## employed -3.170e+00 5.788e-01 -5.476 4.64e-08 ***
## out_of_work 5.329e+00 1.347e+00 3.957 7.73e-05 ***
## homemaker -2.845e+00 1.088e+00 -2.616 0.00893 **
## student -3.195e+00 2.429e+00 -1.315 0.18844
## hlthplan -3.434e+00 8.366e-01 -4.105 4.12e-05 ***
## exercise -1.324e+00 6.877e-01 -1.926 0.05423 .
## smoke 4.271e+00 7.418e-01 5.757 9.25e-09 ***
## drink -3.123e+00 4.362e-01 -7.159 9.73e-13 ***
## bmi_cat1 -1.733e+00 1.447e+00 -1.198 0.23107
## bmi_cat2 -1.517e+00 1.075e+00 -1.411 0.15830
## bmi_cts -8.471e-02 9.681e-02 -0.875 0.38165
## genhlth_ex 1.563e+02 3.589e+02 0.436 0.66322
## genhlth_vg 1.538e+02 3.589e+02 0.429 0.66830
## genhlth_go 1.546e+02 3.589e+02 0.431 0.66662
## genhlth_fa 1.564e+02 3.589e+02 0.436 0.66305
## genhlth_po 1.676e+02 3.589e+02 0.467 0.64051
## qlactlm 4.226e+00 7.248e-01 5.830 6.01e-09 ***
## pm2.5 4.473e-04 1.862e-02 0.024 0.98083
## ozone 1.365e+01 7.735e+00 1.764 0.07775 .
## greenness 1.505e+00 3.462e-01 4.349 1.41e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.181 on 3773 degrees of freedom
## (13081 observations deleted due to missingness)
## Multiple R-squared: 0.39, Adjusted R-squared: 0.385
## F-statistic: 77.81 on 31 and 3773 DF, p-value: < 2.2e-16
delete genhlth_vg:
mod5 = lm(menthlth ~ sex + black + asian + educ1 + educ2 + income1 + income4 + income5 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + bmi_cat1 + bmi_cat2 + bmi_cts + genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod5)
##
## Call:
## lm(formula = menthlth ~ sex + black + asian + educ1 + educ2 +
## income1 + income4 + income5 + income6 + income7 + married +
## employed + out_of_work + homemaker + student + hlthplan +
## exercise + smoke + drink + bmi_cat1 + bmi_cat2 + bmi_cts +
## genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm +
## pm2.5 + ozone + greenness, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.3048 -1.0580 -0.0423 0.9593 21.1851
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.9059315 3.8083300 4.439 9.29e-06 ***
## sex 1.1545614 0.6555309 1.761 0.07828 .
## black 0.1350305 0.1501968 0.899 0.36870
## asian -0.3668663 0.3661573 -1.002 0.31644
## educ1 0.9039184 1.2965802 0.697 0.48575
## educ2 1.4915840 0.5356231 2.785 0.00538 **
## income1 -7.3075294 1.2279432 -5.951 2.91e-09 ***
## income4 -2.2781783 0.8691468 -2.621 0.00880 **
## income5 -0.5170001 0.8316940 -0.622 0.53423
## income6 -1.2732501 0.7895452 -1.613 0.10691
## income7 -0.8456256 0.8245048 -1.026 0.30514
## married -1.2867348 0.5473429 -2.351 0.01878 *
## employed -3.1773746 0.5784989 -5.492 4.23e-08 ***
## out_of_work 5.3258498 1.3465454 3.955 7.79e-05 ***
## homemaker -2.8495650 1.0874395 -2.620 0.00882 **
## student -3.2042958 2.4285592 -1.319 0.18711
## hlthplan -3.4410065 0.8363564 -4.114 3.97e-05 ***
## exercise -1.3249926 0.6876680 -1.927 0.05408 .
## smoke 4.2618061 0.7414596 5.748 9.75e-09 ***
## drink -3.1209840 0.4361487 -7.156 9.95e-13 ***
## bmi_cat1 -1.7450688 1.4464325 -1.206 0.22771
## bmi_cat2 -1.5219590 1.0751466 -1.416 0.15698
## bmi_cts -0.0850834 0.0967951 -0.879 0.37945
## genhlth_ex 2.5089983 0.9950005 2.522 0.01172 *
## genhlth_go 0.8268782 0.8226417 1.005 0.31489
## genhlth_fa 2.5939973 1.0807838 2.400 0.01644 *
## genhlth_po 13.8189923 1.4273400 9.682 < 2e-16 ***
## qlactlm 4.2227559 0.7247162 5.827 6.12e-09 ***
## pm2.5 0.0003147 0.0186119 0.017 0.98651
## ozone 13.7257738 7.7322703 1.775 0.07596 .
## greenness 1.5066414 0.3461142 4.353 1.38e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.181 on 3774 degrees of freedom
## (13081 observations deleted due to missingness)
## Multiple R-squared: 0.39, Adjusted R-squared: 0.3851
## F-statistic: 80.42 on 30 and 3774 DF, p-value: < 2.2e-16
delete income5:
mod6 = lm(menthlth ~ sex + black + asian + educ1 + educ2 + income1 + income4 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + bmi_cat1 + bmi_cat2 + bmi_cts + genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod6)
##
## Call:
## lm(formula = menthlth ~ sex + black + asian + educ1 + educ2 +
## income1 + income4 + income6 + income7 + married + employed +
## out_of_work + homemaker + student + hlthplan + exercise +
## smoke + drink + bmi_cat1 + bmi_cat2 + bmi_cts + genhlth_ex +
## genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 +
## ozone + greenness, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.2400 -1.0615 -0.0447 0.9645 21.1293
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.7970457 3.8039904 4.416 1.04e-05 ***
## sex 1.1666259 0.6551902 1.781 0.07506 .
## black 0.1392310 0.1500326 0.928 0.35346
## asian -0.3613184 0.3660188 -0.987 0.32363
## educ1 0.9163116 1.2963215 0.707 0.47970
## educ2 1.4154634 0.5213950 2.715 0.00666 **
## income1 -7.1717625 1.2082655 -5.936 3.19e-09 ***
## income4 -2.2455138 0.8674864 -2.589 0.00968 **
## income6 -1.2078350 0.7824373 -1.544 0.12275
## income7 -0.7670658 0.8146955 -0.942 0.34649
## married -1.2712945 0.5467346 -2.325 0.02011 *
## employed -3.1449712 0.5760988 -5.459 5.09e-08 ***
## out_of_work 5.3854958 1.3430132 4.010 6.19e-05 ***
## homemaker -2.8026699 1.0847314 -2.584 0.00981 **
## student -3.2040596 2.4283617 -1.319 0.18710
## hlthplan -3.3971077 0.8333019 -4.077 4.66e-05 ***
## exercise -1.3410662 0.6871259 -1.952 0.05105 .
## smoke 4.2415793 0.7406851 5.727 1.10e-08 ***
## drink -3.1214711 0.4361125 -7.157 9.83e-13 ***
## bmi_cat1 -1.7463674 1.4463134 -1.207 0.22733
## bmi_cat2 -1.5368974 1.0747906 -1.430 0.15281
## bmi_cts -0.0868244 0.0967467 -0.897 0.36954
## genhlth_ex 2.5422568 0.9934802 2.559 0.01054 *
## genhlth_go 0.8205911 0.8225127 0.998 0.31851
## genhlth_fa 2.6165495 1.0800869 2.423 0.01546 *
## genhlth_po 13.8579888 1.4258449 9.719 < 2e-16 ***
## qlactlm 4.2151283 0.7245534 5.818 6.47e-09 ***
## pm2.5 0.0006723 0.0186015 0.036 0.97117
## ozone 13.9745241 7.7212811 1.810 0.07040 .
## greenness 1.5185102 0.3455590 4.394 1.14e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.18 on 3775 degrees of freedom
## (13081 observations deleted due to missingness)
## Multiple R-squared: 0.3899, Adjusted R-squared: 0.3852
## F-statistic: 83.19 on 29 and 3775 DF, p-value: < 2.2e-16
delete educ1:
mod7 = lm(menthlth ~ sex + black + asian + educ2 + income1 + income4 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + bmi_cat1 + bmi_cat2 + bmi_cts + genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod7)
##
## Call:
## lm(formula = menthlth ~ sex + black + asian + educ2 + income1 +
## income4 + income6 + income7 + married + employed + out_of_work +
## homemaker + student + hlthplan + exercise + smoke + drink +
## bmi_cat1 + bmi_cat2 + bmi_cts + genhlth_ex + genhlth_go +
## genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,
## data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.1891 -1.0537 -0.0430 0.9667 21.1571
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.931884 3.792287 4.465 8.25e-06 ***
## sex 1.132782 0.653317 1.734 0.08302 .
## black 0.136321 0.149746 0.910 0.36270
## asian -0.362910 0.365669 -0.992 0.32104
## educ2 1.350864 0.509700 2.650 0.00808 **
## income1 -6.978244 1.174115 -5.943 3.04e-09 ***
## income4 -2.237425 0.865841 -2.584 0.00980 **
## income6 -1.204154 0.781912 -1.540 0.12364
## income7 -0.827323 0.811953 -1.019 0.30830
## married -1.240633 0.545939 -2.272 0.02311 *
## employed -3.129805 0.575115 -5.442 5.60e-08 ***
## out_of_work 5.397968 1.341426 4.024 5.83e-05 ***
## homemaker -2.740568 1.080720 -2.536 0.01126 *
## student -3.370241 2.422361 -1.391 0.16421
## hlthplan -3.462430 0.825121 -4.196 2.78e-05 ***
## exercise -1.382524 0.684225 -2.021 0.04339 *
## smoke 4.194612 0.736235 5.697 1.31e-08 ***
## drink -3.145698 0.434747 -7.236 5.58e-13 ***
## bmi_cat1 -1.737268 1.445301 -1.202 0.22943
## bmi_cat2 -1.518598 1.074117 -1.414 0.15750
## bmi_cts -0.087568 0.096671 -0.906 0.36508
## genhlth_ex 2.520727 0.992915 2.539 0.01117 *
## genhlth_go 0.851434 0.819769 1.039 0.29904
## genhlth_fa 2.781992 1.057813 2.630 0.00857 **
## genhlth_po 14.073221 1.390628 10.120 < 2e-16 ***
## qlactlm 4.152048 0.719144 5.774 8.38e-09 ***
## pm2.5 0.002109 0.018546 0.114 0.90948
## ozone 14.018889 7.712750 1.818 0.06920 .
## greenness 1.494825 0.344899 4.334 1.50e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.18 on 3779 degrees of freedom
## (13078 observations deleted due to missingness)
## Multiple R-squared: 0.3902, Adjusted R-squared: 0.3856
## F-statistic: 86.35 on 28 and 3779 DF, p-value: < 2.2e-16
delete bmi_cts:
mod8 = lm(menthlth ~ sex + black + asian + educ2 + income1 + income4 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + bmi_cat1 + bmi_cat2 + genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod8)
##
## Call:
## lm(formula = menthlth ~ sex + black + asian + educ2 + income1 +
## income4 + income6 + income7 + married + employed + out_of_work +
## homemaker + student + hlthplan + exercise + smoke + drink +
## bmi_cat1 + bmi_cat2 + genhlth_ex + genhlth_go + genhlth_fa +
## genhlth_po + qlactlm + pm2.5 + ozone + greenness, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.2559 -1.0592 -0.0402 0.9714 21.1316
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.809691 1.581582 8.732 < 2e-16 ***
## sex 1.108766 0.652763 1.699 0.08948 .
## black 0.131590 0.149651 0.879 0.37929
## asian -0.355522 0.365569 -0.973 0.33086
## educ2 1.338162 0.509495 2.626 0.00866 **
## income1 -6.991240 1.173999 -5.955 2.84e-09 ***
## income4 -2.229043 0.865771 -2.575 0.01007 *
## income6 -1.190139 0.781740 -1.522 0.12799
## income7 -0.804173 0.811532 -0.991 0.32178
## married -1.231360 0.545830 -2.256 0.02413 *
## employed -3.152500 0.574555 -5.487 4.36e-08 ***
## out_of_work 5.337033 1.339706 3.984 6.91e-05 ***
## homemaker -2.722302 1.080507 -2.519 0.01179 *
## student -3.255842 2.419009 -1.346 0.17840
## hlthplan -3.432160 0.824425 -4.163 3.21e-05 ***
## exercise -1.336410 0.682312 -1.959 0.05023 .
## smoke 4.230261 0.735165 5.754 9.40e-09 ***
## drink -3.142121 0.434718 -7.228 5.90e-13 ***
## bmi_cat1 -0.596659 0.709483 -0.841 0.40041
## bmi_cat2 -0.825977 0.754364 -1.095 0.27362
## genhlth_ex 2.567698 0.991537 2.590 0.00965 **
## genhlth_go 0.852631 0.819748 1.040 0.29835
## genhlth_fa 2.769813 1.057703 2.619 0.00886 **
## genhlth_po 14.080355 1.390573 10.126 < 2e-16 ***
## qlactlm 4.097882 0.716637 5.718 1.16e-08 ***
## pm2.5 0.001368 0.018527 0.074 0.94116
## ozone 14.239468 7.708722 1.847 0.06480 .
## greenness 1.490424 0.344856 4.322 1.59e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.18 on 3780 degrees of freedom
## (13078 observations deleted due to missingness)
## Multiple R-squared: 0.39, Adjusted R-squared: 0.3857
## F-statistic: 89.52 on 27 and 3780 DF, p-value: < 2.2e-16
delete bmi_cat1:
mod9 = lm(menthlth ~ sex + black + asian + educ2 + income1 + income4 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + bmi_cat2 + genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod9)
##
## Call:
## lm(formula = menthlth ~ sex + black + asian + educ2 + income1 +
## income4 + income6 + income7 + married + employed + out_of_work +
## homemaker + student + hlthplan + exercise + smoke + drink +
## bmi_cat2 + genhlth_ex + genhlth_go + genhlth_fa + genhlth_po +
## qlactlm + pm2.5 + ozone + greenness, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.2694 -1.0571 -0.0408 0.9764 21.0710
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.455543 1.524429 8.827 < 2e-16 ***
## sex 1.069038 0.651026 1.642 0.10066
## black 0.140336 0.149283 0.940 0.34725
## asian -0.370013 0.365148 -1.013 0.31097
## educ2 1.414777 0.501264 2.822 0.00479 **
## income1 -6.946582 1.172752 -5.923 3.44e-09 ***
## income4 -2.190794 0.864542 -2.534 0.01132 *
## income6 -1.144794 0.779848 -1.468 0.14220
## income7 -0.777008 0.810857 -0.958 0.33800
## married -1.199491 0.544492 -2.203 0.02766 *
## employed -3.107439 0.572029 -5.432 5.91e-08 ***
## out_of_work 5.386927 1.338340 4.025 5.81e-05 ***
## homemaker -2.718863 1.080457 -2.516 0.01190 *
## student -3.252090 2.418911 -1.344 0.17889
## hlthplan -3.417345 0.824205 -4.146 3.45e-05 ***
## exercise -1.400126 0.678067 -2.065 0.03900 *
## smoke 4.150608 0.729009 5.693 1.34e-08 ***
## drink -3.164315 0.433900 -7.293 3.68e-13 ***
## bmi_cat2 -0.524877 0.663958 -0.791 0.42927
## genhlth_ex 2.428255 0.977537 2.484 0.01303 *
## genhlth_go 0.934257 0.813950 1.148 0.25112
## genhlth_fa 2.867366 1.051282 2.727 0.00641 **
## genhlth_po 14.126455 1.389438 10.167 < 2e-16 ***
## qlactlm 4.128102 0.715708 5.768 8.67e-09 ***
## pm2.5 0.002069 0.018508 0.112 0.91100
## ozone 13.982930 7.702386 1.815 0.06954 .
## greenness 1.502176 0.344560 4.360 1.34e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.18 on 3781 degrees of freedom
## (13078 observations deleted due to missingness)
## Multiple R-squared: 0.3899, Adjusted R-squared: 0.3857
## F-statistic: 92.94 on 26 and 3781 DF, p-value: < 2.2e-16
delete bmi_cat2:
mod10 = lm(menthlth ~ sex + black + asian + educ2 + income1 + income4 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod10)
##
## Call:
## lm(formula = menthlth ~ sex + black + asian + educ2 + income1 +
## income4 + income6 + income7 + married + employed + out_of_work +
## homemaker + student + hlthplan + exercise + smoke + drink +
## genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm +
## pm2.5 + ozone + greenness, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.2751 -1.0612 -0.0432 0.9703 20.9489
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.233353 1.498220 8.833 < 2e-16 ***
## sex 1.106615 0.649257 1.704 0.08838 .
## black 0.136771 0.149208 0.917 0.35939
## asian -0.370158 0.365130 -1.014 0.31076
## educ2 1.387587 0.500058 2.775 0.00555 **
## income1 -6.952568 1.172670 -5.929 3.32e-09 ***
## income4 -2.171941 0.864170 -2.513 0.01200 *
## income6 -1.159868 0.779577 -1.488 0.13688
## income7 -0.808770 0.809821 -0.999 0.31800
## married -1.232731 0.542839 -2.271 0.02321 *
## employed -3.080563 0.570990 -5.395 7.27e-08 ***
## out_of_work 5.427554 1.337287 4.059 5.04e-05 ***
## homemaker -2.704300 1.080246 -2.503 0.01234 *
## student -3.178348 2.416992 -1.315 0.18859
## hlthplan -3.393198 0.823598 -4.120 3.87e-05 ***
## exercise -1.416326 0.677723 -2.090 0.03670 *
## smoke 4.147400 0.728962 5.689 1.37e-08 ***
## drink -3.178133 0.433526 -7.331 2.78e-13 ***
## genhlth_ex 2.461532 0.976582 2.521 0.01176 *
## genhlth_go 0.955399 0.813471 1.174 0.24028
## genhlth_fa 2.891422 1.050789 2.752 0.00596 **
## genhlth_po 14.153748 1.388940 10.190 < 2e-16 ***
## qlactlm 4.154299 0.714905 5.811 6.72e-09 ***
## pm2.5 0.002191 0.018506 0.118 0.90575
## ozone 13.926352 7.701671 1.808 0.07065 .
## greenness 1.500980 0.344539 4.356 1.36e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.18 on 3782 degrees of freedom
## (13078 observations deleted due to missingness)
## Multiple R-squared: 0.3898, Adjusted R-squared: 0.3858
## F-statistic: 96.65 on 25 and 3782 DF, p-value: < 2.2e-16
delete black:
mod11 = lm(menthlth ~ sex + asian + educ2 + income1 + income4 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod11)
##
## Call:
## lm(formula = menthlth ~ sex + asian + educ2 + income1 + income4 +
## income6 + income7 + married + employed + out_of_work + homemaker +
## student + hlthplan + exercise + smoke + drink + genhlth_ex +
## genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 +
## ozone + greenness, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.2626 -1.0407 -0.0406 0.9574 21.0823
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.1316475 1.4555314 9.022 < 2e-16 ***
## sex 1.3204685 0.6324717 2.088 0.03688 *
## asian -0.1872403 0.2658969 -0.704 0.48136
## educ2 1.3279972 0.4858312 2.733 0.00630 **
## income1 -6.9002780 1.1488503 -6.006 2.07e-09 ***
## income4 -2.2420937 0.8440959 -2.656 0.00793 **
## income6 -1.0846417 0.7564290 -1.434 0.15168
## income7 -0.7967446 0.7859403 -1.014 0.31077
## married -1.2526395 0.5193240 -2.412 0.01591 *
## employed -3.1059396 0.5549723 -5.597 2.33e-08 ***
## out_of_work 5.3144316 1.3061909 4.069 4.82e-05 ***
## homemaker -2.8937121 1.0514939 -2.752 0.00595 **
## student -3.7268272 2.2898653 -1.628 0.10370
## hlthplan -3.4181576 0.8010686 -4.267 2.03e-05 ***
## exercise -1.3927426 0.6581059 -2.116 0.03438 *
## smoke 4.3199347 0.7089008 6.094 1.21e-09 ***
## drink -3.2331399 0.4193523 -7.710 1.58e-14 ***
## genhlth_ex 2.4471046 0.9505498 2.574 0.01008 *
## genhlth_go 1.0592587 0.7930507 1.336 0.18173
## genhlth_fa 2.9364303 1.0255170 2.863 0.00421 **
## genhlth_po 14.1865856 1.3531702 10.484 < 2e-16 ***
## qlactlm 3.9966302 0.6900912 5.791 7.52e-09 ***
## pm2.5 -0.0008375 0.0176943 -0.047 0.96225
## ozone 15.2872421 7.3472445 2.081 0.03753 *
## greenness 1.5439223 0.3303762 4.673 3.06e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.151 on 3950 degrees of freedom
## (12911 observations deleted due to missingness)
## Multiple R-squared: 0.3987, Adjusted R-squared: 0.395
## F-statistic: 109.1 on 24 and 3950 DF, p-value: < 2.2e-16
delete asian:
mod12 = lm(menthlth ~ sex + educ2 + income1 + income4 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod12)
##
## Call:
## lm(formula = menthlth ~ sex + educ2 + income1 + income4 + income6 +
## income7 + married + employed + out_of_work + homemaker +
## student + hlthplan + exercise + smoke + drink + genhlth_ex +
## genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 +
## ozone + greenness, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.6846 -1.2947 -0.0493 1.1526 22.6382
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.255664 0.916346 11.192 < 2e-16 ***
## sex -0.061897 0.406604 -0.152 0.879010
## educ2 0.571854 0.311227 1.837 0.066174 .
## income1 -0.750505 0.713574 -1.052 0.292934
## income4 0.332985 0.551755 0.604 0.546187
## income6 0.400030 0.476289 0.840 0.400988
## income7 -0.874572 0.489511 -1.787 0.074025 .
## married -0.122057 0.344987 -0.354 0.723493
## employed -1.831612 0.367889 -4.979 6.49e-07 ***
## out_of_work 4.015560 0.887239 4.526 6.07e-06 ***
## homemaker -1.301440 0.681900 -1.909 0.056345 .
## student -7.871992 1.356813 -5.802 6.73e-09 ***
## hlthplan -1.802891 0.517532 -3.484 0.000497 ***
## exercise -2.011671 0.408545 -4.924 8.60e-07 ***
## smoke 6.260483 0.438637 14.273 < 2e-16 ***
## drink -2.939469 0.274782 -10.697 < 2e-16 ***
## genhlth_ex 0.993972 0.587334 1.692 0.090608 .
## genhlth_go -0.012063 0.483617 -0.025 0.980101
## genhlth_fa 2.789457 0.631444 4.418 1.01e-05 ***
## genhlth_po 12.451268 0.836273 14.889 < 2e-16 ***
## qlactlm 2.479303 0.446270 5.556 2.83e-08 ***
## pm2.5 0.004621 0.013444 0.344 0.731040
## ozone 32.239021 5.646010 5.710 1.16e-08 ***
## greenness 2.889014 0.261178 11.061 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.768 on 11367 degrees of freedom
## (5495 observations deleted due to missingness)
## Multiple R-squared: 0.302, Adjusted R-squared: 0.3006
## F-statistic: 213.9 on 23 and 11367 DF, p-value: < 2.2e-16
delete sex:
mod13 = lm(menthlth ~ educ2 + income1 + income4 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod13)
##
## Call:
## lm(formula = menthlth ~ educ2 + income1 + income4 + income6 +
## income7 + married + employed + out_of_work + homemaker +
## student + hlthplan + exercise + smoke + drink + genhlth_ex +
## genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 +
## ozone + greenness, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.6824 -1.2946 -0.0497 1.1532 22.6351
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.206697 0.857994 11.896 < 2e-16 ***
## educ2 0.573939 0.310912 1.846 0.064921 .
## income1 -0.750924 0.713538 -1.052 0.292641
## income4 0.334825 0.551598 0.607 0.543857
## income6 0.402370 0.476021 0.845 0.397973
## income7 -0.872864 0.489361 -1.784 0.074502 .
## married -0.112655 0.339399 -0.332 0.739951
## employed -1.830517 0.367803 -4.977 6.56e-07 ***
## out_of_work 4.016963 0.887153 4.528 6.02e-06 ***
## homemaker -1.322031 0.668321 -1.978 0.047937 *
## student -7.871466 1.356751 -5.802 6.74e-09 ***
## hlthplan -1.805689 0.517184 -3.491 0.000482 ***
## exercise -2.007987 0.407810 -4.924 8.61e-07 ***
## smoke 6.262179 0.438477 14.282 < 2e-16 ***
## drink -2.932186 0.270573 -10.837 < 2e-16 ***
## genhlth_ex 0.992737 0.587253 1.690 0.090964 .
## genhlth_go -0.012455 0.483589 -0.026 0.979453
## genhlth_fa 2.791423 0.631285 4.422 9.88e-06 ***
## genhlth_po 12.454114 0.836028 14.897 < 2e-16 ***
## qlactlm 2.480392 0.446194 5.559 2.77e-08 ***
## pm2.5 0.004536 0.013431 0.338 0.735610
## ozone 32.281336 5.638920 5.725 1.06e-08 ***
## greenness 2.887478 0.260972 11.064 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.768 on 11368 degrees of freedom
## (5495 observations deleted due to missingness)
## Multiple R-squared: 0.302, Adjusted R-squared: 0.3007
## F-statistic: 223.6 on 22 and 11368 DF, p-value: < 2.2e-16
delete genhlth_go:
mod14 = lm(menthlth ~ educ2 + income1 + income4 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_ex + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod14)
##
## Call:
## lm(formula = menthlth ~ educ2 + income1 + income4 + income6 +
## income7 + married + employed + out_of_work + homemaker +
## student + hlthplan + exercise + smoke + drink + genhlth_ex +
## genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,
## data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.680 -1.294 -0.050 1.153 22.634
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.197105 0.772912 13.193 < 2e-16 ***
## educ2 0.573085 0.309129 1.854 0.06378 .
## income1 -0.751498 0.713158 -1.054 0.29201
## income4 0.334921 0.551562 0.607 0.54372
## income6 0.402592 0.475922 0.846 0.39761
## income7 -0.872368 0.488961 -1.784 0.07443 .
## married -0.112239 0.338998 -0.331 0.74058
## employed -1.829920 0.367057 -4.985 6.27e-07 ***
## out_of_work 4.017215 0.887060 4.529 6.00e-06 ***
## homemaker -1.322044 0.668292 -1.978 0.04793 *
## student -7.871583 1.356683 -5.802 6.72e-09 ***
## hlthplan -1.804020 0.513087 -3.516 0.00044 ***
## exercise -2.007095 0.406319 -4.940 7.94e-07 ***
## smoke 6.261979 0.438389 14.284 < 2e-16 ***
## drink -2.931424 0.268940 -10.900 < 2e-16 ***
## genhlth_ex 0.999657 0.522162 1.914 0.05559 .
## genhlth_fa 2.798601 0.566406 4.941 7.88e-07 ***
## genhlth_po 12.461662 0.782942 15.916 < 2e-16 ***
## qlactlm 2.479634 0.445202 5.570 2.61e-08 ***
## pm2.5 0.004525 0.013425 0.337 0.73607
## ozone 32.290143 5.628296 5.737 9.88e-09 ***
## greenness 2.887963 0.260282 11.096 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.768 on 11369 degrees of freedom
## (5495 observations deleted due to missingness)
## Multiple R-squared: 0.302, Adjusted R-squared: 0.3007
## F-statistic: 234.3 on 21 and 11369 DF, p-value: < 2.2e-16
delete married:
mod15 = lm(menthlth ~ educ2 + income1 + income4 + income6 + income7 + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_ex + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod15)
##
## Call:
## lm(formula = menthlth ~ educ2 + income1 + income4 + income6 +
## income7 + employed + out_of_work + homemaker + student +
## hlthplan + exercise + smoke + drink + genhlth_ex + genhlth_fa +
## genhlth_po + qlactlm + pm2.5 + ozone + greenness, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.6631 -1.2933 -0.0495 1.1533 22.6260
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.150332 0.759862 13.358 < 2e-16 ***
## educ2 0.567198 0.308605 1.838 0.066097 .
## income1 -0.702147 0.697380 -1.007 0.314035
## income4 0.346595 0.550412 0.630 0.528902
## income6 0.392301 0.474887 0.826 0.408769
## income7 -0.897508 0.483010 -1.858 0.063173 .
## employed -1.842952 0.364926 -5.050 4.48e-07 ***
## out_of_work 4.028979 0.886313 4.546 5.53e-06 ***
## homemaker -1.361293 0.657668 -2.070 0.038486 *
## student -7.818007 1.346946 -5.804 6.64e-09 ***
## hlthplan -1.816531 0.511673 -3.550 0.000387 ***
## exercise -2.011937 0.406040 -4.955 7.34e-07 ***
## smoke 6.277095 0.435988 14.397 < 2e-16 ***
## drink -2.920737 0.266986 -10.940 < 2e-16 ***
## genhlth_ex 1.002233 0.522083 1.920 0.054923 .
## genhlth_fa 2.810562 0.565231 4.972 6.71e-07 ***
## genhlth_po 12.464606 0.782861 15.922 < 2e-16 ***
## qlactlm 2.477284 0.445128 5.565 2.68e-08 ***
## pm2.5 0.004938 0.013366 0.369 0.711816
## ozone 32.206553 5.622410 5.728 1.04e-08 ***
## greenness 2.886521 0.260235 11.092 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.767 on 11370 degrees of freedom
## (5495 observations deleted due to missingness)
## Multiple R-squared: 0.302, Adjusted R-squared: 0.3008
## F-statistic: 246 on 20 and 11370 DF, p-value: < 2.2e-16
delete income4:
mod16 = lm(menthlth ~ educ2 + income1 + income6 + income7 + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_ex + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod16)
##
## Call:
## lm(formula = menthlth ~ educ2 + income1 + income6 + income7 +
## employed + out_of_work + homemaker + student + hlthplan +
## exercise + smoke + drink + genhlth_ex + genhlth_fa + genhlth_po +
## qlactlm + pm2.5 + ozone + greenness, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.7490 -1.2902 -0.0484 1.1506 22.7061
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.267050 0.742162 13.834 < 2e-16 ***
## educ2 0.602904 0.304090 1.983 0.047430 *
## income1 -0.792955 0.689181 -1.151 0.249930
## income6 0.349901 0.470356 0.744 0.456948
## income7 -0.987105 0.473388 -2.085 0.037074 *
## employed -1.863608 0.362637 -5.139 2.81e-07 ***
## out_of_work 4.022371 0.885837 4.541 5.66e-06 ***
## homemaker -1.390504 0.655896 -2.120 0.034027 *
## student -7.829242 1.343951 -5.826 5.85e-09 ***
## hlthplan -1.853019 0.509587 -3.636 0.000278 ***
## exercise -2.009024 0.405708 -4.952 7.45e-07 ***
## smoke 6.266625 0.435244 14.398 < 2e-16 ***
## drink -2.933354 0.266005 -11.027 < 2e-16 ***
## genhlth_ex 0.986282 0.521525 1.891 0.058630 .
## genhlth_fa 2.832592 0.564557 5.017 5.32e-07 ***
## genhlth_po 12.443779 0.782558 15.901 < 2e-16 ***
## qlactlm 2.479736 0.444978 5.573 2.57e-08 ***
## pm2.5 0.003508 0.013286 0.264 0.791767
## ozone 32.146947 5.615236 5.725 1.06e-08 ***
## greenness 2.888105 0.260095 11.104 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.767 on 11382 degrees of freedom
## (5484 observations deleted due to missingness)
## Multiple R-squared: 0.3019, Adjusted R-squared: 0.3008
## F-statistic: 259.1 on 19 and 11382 DF, p-value: < 2.2e-16
delete income6:
mod17 = lm(menthlth ~ educ2 + income1 + income7 + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_ex + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod17)
##
## Call:
## lm(formula = menthlth ~ educ2 + income1 + income7 + employed +
## out_of_work + homemaker + student + hlthplan + exercise +
## smoke + drink + genhlth_ex + genhlth_fa + genhlth_po + qlactlm +
## pm2.5 + ozone + greenness, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.782 -1.288 -0.048 1.154 22.693
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.313364 0.738347 13.968 < 2e-16 ***
## educ2 0.623898 0.302038 2.066 0.038886 *
## income1 -0.860746 0.683104 -1.260 0.207677
## income7 -1.048633 0.467090 -2.245 0.024785 *
## employed -1.847903 0.362151 -5.103 3.40e-07 ***
## out_of_work 3.990747 0.884935 4.510 6.56e-06 ***
## homemaker -1.402596 0.655731 -2.139 0.032459 *
## student -7.839802 1.343049 -5.837 5.45e-09 ***
## hlthplan -1.829015 0.508746 -3.595 0.000326 ***
## exercise -1.994564 0.405384 -4.920 8.77e-07 ***
## smoke 6.295352 0.433862 14.510 < 2e-16 ***
## drink -2.941625 0.265832 -11.066 < 2e-16 ***
## genhlth_ex 0.972403 0.521240 1.866 0.062129 .
## genhlth_fa 2.807990 0.563156 4.986 6.25e-07 ***
## genhlth_po 12.397304 0.780740 15.879 < 2e-16 ***
## qlactlm 2.484758 0.444737 5.587 2.36e-08 ***
## pm2.5 0.003098 0.013272 0.233 0.815439
## ozone 32.069417 5.611449 5.715 1.12e-08 ***
## greenness 2.876051 0.259553 11.081 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.767 on 11384 degrees of freedom
## (5483 observations deleted due to missingness)
## Multiple R-squared: 0.3019, Adjusted R-squared: 0.3008
## F-statistic: 273.5 on 18 and 11384 DF, p-value: < 2.2e-16
delete income1:
mod18 = lm(menthlth ~ educ2 + income7 + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_ex + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod18)
##
## Call:
## lm(formula = menthlth ~ educ2 + income7 + employed + out_of_work +
## homemaker + student + hlthplan + exercise + smoke + drink +
## genhlth_ex + genhlth_fa + genhlth_po + qlactlm + pm2.5 +
## ozone + greenness, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.544 -1.304 -0.049 1.165 22.762
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.3135973 0.7240722 14.244 < 2e-16 ***
## educ2 0.6468932 0.3008295 2.150 0.031547 *
## income7 -0.9626900 0.4623028 -2.082 0.037330 *
## employed -1.8684210 0.3599958 -5.190 2.14e-07 ***
## out_of_work 3.9239208 0.8813191 4.452 8.57e-06 ***
## homemaker -1.4021883 0.6518621 -2.151 0.031493 *
## student -7.9603739 1.3276376 -5.996 2.08e-09 ***
## hlthplan -1.7880862 0.4993371 -3.581 0.000344 ***
## exercise -2.0982285 0.4033926 -5.201 2.01e-07 ***
## smoke 6.2490540 0.4319204 14.468 < 2e-16 ***
## drink -2.9527947 0.2640249 -11.184 < 2e-16 ***
## genhlth_ex 0.9570070 0.5187859 1.845 0.065106 .
## genhlth_fa 2.6284833 0.5539079 4.745 2.11e-06 ***
## genhlth_po 12.0485502 0.7658576 15.732 < 2e-16 ***
## qlactlm 2.4617039 0.4432345 5.554 2.85e-08 ***
## pm2.5 0.0007633 0.0132234 0.058 0.953971
## ozone 33.2546829 5.5915006 5.947 2.80e-09 ***
## greenness 2.9106861 0.2586182 11.255 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.769 on 11486 degrees of freedom
## (5382 observations deleted due to missingness)
## Multiple R-squared: 0.3036, Adjusted R-squared: 0.3025
## F-statistic: 294.5 on 17 and 11486 DF, p-value: < 2.2e-16
delete genhlth_ex:
mod19 = lm(menthlth ~ educ2 + income7 + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod19)
##
## Call:
## lm(formula = menthlth ~ educ2 + income7 + employed + out_of_work +
## homemaker + student + hlthplan + exercise + smoke + drink +
## genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,
## data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.5456 -1.3045 -0.0468 1.1641 22.5928
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.454769 0.720140 14.518 < 2e-16 ***
## educ2 0.526837 0.293471 1.795 0.07265 .
## income7 -0.963099 0.462321 -2.083 0.03726 *
## employed -1.803972 0.358376 -5.034 4.88e-07 ***
## out_of_work 3.936364 0.880367 4.471 7.85e-06 ***
## homemaker -1.304757 0.649767 -2.008 0.04466 *
## student -7.804755 1.324278 -5.894 3.88e-09 ***
## hlthplan -1.832259 0.498734 -3.674 0.00024 ***
## exercise -2.018860 0.401007 -5.034 4.86e-07 ***
## smoke 6.191872 0.430883 14.370 < 2e-16 ***
## drink -2.891994 0.261827 -11.045 < 2e-16 ***
## genhlth_fa 2.503150 0.549823 4.553 5.35e-06 ***
## genhlth_po 11.963037 0.764544 15.647 < 2e-16 ***
## qlactlm 2.369105 0.440025 5.384 7.43e-08 ***
## pm2.5 -0.001208 0.013184 -0.092 0.92698
## ozone 34.062301 5.574115 6.111 1.02e-09 ***
## greenness 2.956187 0.257457 11.482 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.769 on 11488 degrees of freedom
## (5381 observations deleted due to missingness)
## Multiple R-squared: 0.3034, Adjusted R-squared: 0.3024
## F-statistic: 312.7 on 16 and 11488 DF, p-value: < 2.2e-16
delete educ2:
mod20 = lm(menthlth ~ income7 + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod20)
##
## Call:
## lm(formula = menthlth ~ income7 + employed + out_of_work + homemaker +
## student + hlthplan + exercise + smoke + drink + genhlth_fa +
## genhlth_po + qlactlm + pm2.5 + ozone + greenness, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.680 -1.299 -0.054 1.161 22.680
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.8950021 0.6771652 16.089 < 2e-16 ***
## income7 -0.9797097 0.4622734 -2.119 0.034084 *
## employed -1.8655613 0.3567650 -5.229 1.73e-07 ***
## out_of_work 3.9186037 0.8803965 4.451 8.63e-06 ***
## homemaker -1.2810520 0.6496960 -1.972 0.048660 *
## student -8.0383517 1.3179967 -6.099 1.10e-09 ***
## hlthplan -1.8849753 0.4979174 -3.786 0.000154 ***
## exercise -2.1674331 0.3924119 -5.523 3.40e-08 ***
## smoke 6.3684068 0.4195520 15.179 < 2e-16 ***
## drink -3.0081166 0.2537350 -11.855 < 2e-16 ***
## genhlth_fa 2.5707795 0.5485835 4.686 2.82e-06 ***
## genhlth_po 11.9444866 0.7645486 15.623 < 2e-16 ***
## qlactlm 2.3221602 0.4392893 5.286 1.27e-07 ***
## pm2.5 -0.0001499 0.0131717 -0.011 0.990923
## ozone 33.5504328 5.5673557 6.026 1.73e-09 ***
## greenness 2.9562341 0.2574818 11.481 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.769 on 11489 degrees of freedom
## (5381 observations deleted due to missingness)
## Multiple R-squared: 0.3032, Adjusted R-squared: 0.3023
## F-statistic: 333.2 on 15 and 11489 DF, p-value: < 2.2e-16
this is the final regression model considering the relationship:
if we delete ozone:
mod17 = lm(menthlth ~ black + educ2 + income1 + income4 + married + employed + out_of_work + homemaker + student + hlthplan + smoke + drink + genhlth_ex + genhlth_fa + genhlth_po + qlactlm + pm2.5,data = reg_data)
summary(mod17)
##
## Call:
## lm(formula = menthlth ~ black + educ2 + income1 + income4 + married +
## employed + out_of_work + homemaker + student + hlthplan +
## smoke + drink + genhlth_ex + genhlth_fa + genhlth_po + qlactlm +
## pm2.5, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.3218 -1.0317 -0.0736 0.9414 21.7154
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.74686 0.93724 14.667 < 2e-16 ***
## black 0.37834 0.11252 3.363 0.000778 ***
## educ2 1.53540 0.42476 3.615 0.000304 ***
## income1 -5.86823 1.01930 -5.757 9.09e-09 ***
## income4 -1.72439 0.76284 -2.260 0.023836 *
## married -1.03376 0.44919 -2.301 0.021414 *
## employed -3.26315 0.49272 -6.623 3.92e-11 ***
## out_of_work 4.40985 1.18811 3.712 0.000208 ***
## homemaker -2.56811 0.92069 -2.789 0.005303 **
## student -4.62055 1.98946 -2.323 0.020247 *
## hlthplan -3.08556 0.71042 -4.343 1.43e-05 ***
## smoke 5.32836 0.61954 8.601 < 2e-16 ***
## drink -3.83040 0.35301 -10.851 < 2e-16 ***
## genhlth_ex 2.15958 0.74362 2.904 0.003700 **
## genhlth_fa 2.72990 0.81743 3.340 0.000845 ***
## genhlth_po 15.63954 1.13876 13.734 < 2e-16 ***
## qlactlm 3.72105 0.63389 5.870 4.65e-09 ***
## pm2.5 0.03442 0.01414 2.434 0.014984 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.069 on 4777 degrees of freedom
## (12091 observations deleted due to missingness)
## Multiple R-squared: 0.4007, Adjusted R-squared: 0.3986
## F-statistic: 187.9 on 17 and 4777 DF, p-value: < 2.2e-16
aggregate into state level:
st.codes<-data.frame(
state=c(1,2,4,5,6,8,9,10,11,12,13,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,44,45,46,47,48,49,50,51,53,54,55,56,72),full=c("alabama","alaska","arizona","arkansas","california","colorado","connecticut","delaware","district of columbia","florida","georgia","hawaii","idaho","illinois","indiana","iowa","kansas","kentucky","louisiana","maine","maryland","massachusetts","michigan","minnesota","mississippi","missouri","montana", "nebraska","nevada","new hampshire","new jersey","new mexico","new york","north carolina","north dakota","ohio","oklahoma","oregon","pennsylvania","rhode island","south carolina","south dakota","tennessee","texas","utah","vermont","virginia","washington","west virginia","wisconsin","wyoming","puerto rico"))
write state level data for 2000:
library("SASxport")
library(dplyr)
data_00 <-read.xport("CDBRFS00.XPT")
data <- data_00 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
filter(!is.na(fips)) %>%
select(X.STATE,CTYCODE,IMONTH,MENTHLTH,CVDINFAR,CVDCORHD,CVDSTROK,ASTHMA,AGE,SEX,ORACE,EDUCA,INCOME2,MARITAL,EMPLOY,HLTHPLAN,EXERANY,X.RFSMOK2,DRINKANY,X.BMI2CAT,X.BMI2,GENHLTH,QLACTLMT)
NA
data$CTYCODE[data$CTYCODE %in% c(777,999)] <- c(NA)
data$MENTHLTH[data$MENTHLTH %in% c(77,88,99)] <- c(NA)
data$CVDINFAR[data$CVDINFAR %in% c(7,9)] <- c(NA)
data$CVDCORHD[data$CVDCORHD %in% c(7,9)] <- c(NA)
data$CVDSTROK[data$CVDSTROK %in% c(7,9)] <- c(NA)
data$ASTHMA[data$ASTHMA %in% c(7,9)] <- c(NA)
data$AGE[data$AGE %in% c(7,9)] <- c(NA)
data$ORACE[data$ORACE %in% c(7,9)] <- c(NA)
data$EDUCA[data$EDUCA == 9] <- c(NA)
data$INCOME2[data$INCOME2 %in% c(77,99)] <- c(NA)
data$MARITAL[data$MARITAL == 9] <- c(NA)
data$EMPLOY[data$EMPLOY == 9] <- c(NA)
data$HLTHPLAN[data$HLTHPLAN %in% c(7,9)] <- c(NA)
data$EXERANY[data$EXERANY %in% c(7,9)] <- c(NA)
data$X.RFSMOK2[data$X.RFSMOK2 == 9] <- c(NA)
data$DRINKANY[data$DRINKANY %in% c(7,9)] <- c(NA)
data$X.BMI2CAT[data$X.BMI2CAT == 9] <- c(NA)
data$X.BMI2[data$X.BMI2 == 999] <- c(NA)
data$GENHLTH[data$GENHLTH %in% c(7,9)] <- c(NA)
data$QLACTLM2[data$QLACTLMT %in% c(7,9)] <- c(NA)
SELECT
data00 <-data %>% group_by(X.STATE) %>%
dplyr::summarise(year = 2000,
menthlth = mean(MENTHLTH, na.rm= TRUE),
heartattack = mean(CVDINFAR == 1, na.rm = TRUE),
ACheartdis = mean(CVDCORHD == 1, na.rm = TRUE),
stroke = mean(CVDSTROK == 1, na.rm = TRUE),
asthma = mean(ASTHMA == 1, na.rm = TRUE),
age = mean(AGE, na.rm = TRUE),
sex = mean(SEX == 2, na.rm = TRUE),
white = mean(ORACE == 1, na.rm = TRUE),
black = mean(ORACE == 2, na.rm = TRUE),
asian = mean(ORACE == 3, na.rm = TRUE),
race_other = mean(ORACE == 4 | ORACE == 5 | ORACE == 6, na.rm = TRUE),
educ1 = mean(EDUCA == 1|EDUCA == 2, na.rm = TRUE),
educ2 = mean(EDUCA == 3|EDUCA == 4, na.rm = TRUE),
educ3 = mean(EDUCA == 5|EDUCA == 6, na.rm = TRUE),
income1 = mean(INCOME2 == 1, na.rm = TRUE),
income2 = mean(INCOME2 == 2, na.rm = TRUE),
income3 = mean(INCOME2 == 3, na.rm = TRUE),
income4 = mean(INCOME2 == 4, na.rm = TRUE),
income5 = mean(INCOME2 == 5, na.rm = TRUE),
income6 = mean(INCOME2 == 6, na.rm = TRUE),
income7 = mean(INCOME2 == 7, na.rm = TRUE),
income8 = mean(INCOME2 == 8, na.rm = TRUE),
married = mean(MARITAL == 1, na.rm = TRUE),
unmarried = mean(MARITAL == 2| MARITAL == 3| MARITAL == 4| MARITAL == 5| MARITAL == 6, na.rm = TRUE),
employed = mean(EMPLOY == 1 | EMPLOY == 2, na.rm = TRUE),
out_of_work = mean(EMPLOY ==3 | EMPLOY ==4, na.rm = TRUE),
homemaker = mean(EMPLOY ==5, na.rm = TRUE),
student = mean(EMPLOY == 6, na.rm = TRUE),
employ_other = mean(EMPLOY ==7 | EMPLOY ==8, na.rm = TRUE),
hlthplan = mean(HLTHPLAN == 1, na.rm = TRUE),
exercise = mean(EXERANY == 1, na.rm = TRUE),
smoke = mean(X.RFSMOK2 == 2, na.rm = TRUE),
drink = mean(DRINKANY == 1, na.rm = TRUE),
bmi_cat1 = mean(X.BMI2CAT == 1, na.rm =TRUE),
bmi_cat2 = mean(X.BMI2CAT == 2, na.rm = TRUE),
bmi_cat3 = mean(X.BMI2CAT == 3, na.rm = TRUE),
bmi_cts = mean(X.BMI2,na.rm = TRUE),
genhlth_ex = mean(GENHLTH == 1, na.rm = TRUE),
genhlth_vg = mean(GENHLTH == 2, na.rm = TRUE),
genhlth_go = mean(GENHLTH == 3, na.rm = TRUE),
genhlth_fa = mean(GENHLTH == 4, na.rm = TRUE),
genhlth_po = mean(GENHLTH == 5, na.rm = TRUE),
qlactlm = mean(QLACTLMT == 1, na.rm = TRUE))
colnames(data00)[1] = "state"
data00 = data00 %>%
left_join(st.codes,by = c("state"="state"))
write.csv(data00, file = "state_00.csv")
Mental health patterns in 2000 for all US states in a map:
library(fiftystater)
library(RColorBrewer)
library(mapproj)
data("fifty_states")
data00 = read_csv("state_menthlth.csv")
## Parsed with column specification:
## cols(
## state = col_integer(),
## year = col_double(),
## menthlth = col_double(),
## full = col_character()
## )
state_00 = data00 %>%
dplyr::select(full,menthlth)
max(state_00$menthlth)
## [1] NaN
min(state_00$menthlth)
## [1] NaN
ggplot(state_00,aes(map_id = full)) +
geom_map(aes(fill = menthlth),map = fifty_states) +
expand_limits(x = fifty_states$long,y = fifty_states$lat) +
coord_map() +
scale_x_continuous(expand=c(0,0)) +
scale_y_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors = rev(colorRampPalette(brewer.pal(9,"RdPu"))(5)),
breaks = seq(7,18,,length.out = 5),
limits = c(7,18),
labels = seq(7,18,length.out = 5)) +
ylab("") +
xlab("")
Select average menthlth for each state across years and write it into a csv file, and then plot mental health across states:
library(SASxport)
data_00 <-read.xport("CDBRFS00.XPT")
data_00 <- data_00 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
filter(!is.na(fips)) %>%
select(X.STATE,CTYCODE,IMONTH,MENTHLTH,CVDINFAR,CVDCORHD,CVDSTROK,ASTHMA,AGE,SEX,ORACE,EDUCA,INCOME2,MARITAL,EMPLOY,HLTHPLAN,EXERANY,X.RFSMOK2,DRINKANY,X.BMI2CAT,X.BMI2,GENHLTH,QLACTLMT)
men_00_state = data_00 %>%
mutate(year = 2000) %>%
select(X.STATE,MENTHLTH,year)
men_00_state$MENTHLTH[men_00_state$MENTHLTH %in% c(77,88,99)] <- c(NA)
men_00_state <- men_00_state %>% group_by(X.STATE) %>%
dplyr::summarise(year = 2000,
menthlth = mean(MENTHLTH, na.rm= TRUE))
data_01 <- read.xport("~/Desktop/Harvard/fall 2017/bst260/project/CDBRFS01.XPT")
data_01 <- data_01 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
filter(!is.na(fips)) %>%
select(fips, X.STATE,CTYCODE,IMONTH,MENTHLTH,CVDINFR2,CVDCRHD2,CVDSTRK2,ASTHMA2,AGE,SEX,ORACE2,EDUCA,INCOME2,MARITAL,EMPLOY,HLTHPLAN,EXERANY2,X.RFSMOK2,X.BMI2CAT,X.BMI2,GENHLTH,QLACTLM2)
men_01_state = data_01 %>%
mutate(year = 2001) %>%
select(X.STATE,MENTHLTH,year)
men_01_state$MENTHLTH[men_01_state$MENTHLTH %in% c(77,88,99)] <- c(NA)
men_01_state <- men_01_state %>% group_by(X.STATE) %>%
dplyr::summarise(year = 2001,
menthlth = mean(MENTHLTH, na.rm= TRUE))
data_02 <- read.xport("~/Desktop/Harvard/fall 2017/bst260/project/cdbrfs02.xpt")
data_02 <- data_02 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
filter(!is.na(fips)) %>%
select(fips, X.STATE,CTYCODE,IMONTH,MENTHLTH,CVDINFR2,CVDCRHD2,CVDSTRK2,ASTHMA2,AGE,SEX,ORACE2,EDUCA,INCOME2,MARITAL,EMPLOY,HLTHPLAN,EXERANY2,X.RFSMOK2,DRNKANY3,X.BMI2CAT,X.BMI2,GENHLTH,QLACTLM2)
men_02_state = data_02 %>%
mutate(year = 2002) %>%
select(X.STATE,MENTHLTH,year)
men_02_state$MENTHLTH[men_02_state$MENTHLTH %in% c(77,88,99)] <- c(NA)
men_02_state <- men_02_state %>% group_by(X.STATE) %>%
dplyr::summarise(year = 2002,
menthlth = mean(MENTHLTH, na.rm= TRUE))
data_03 <- read.xport("~/Desktop/Harvard/fall 2017/bst260/project/cdbrfs03.xpt")
data_03 <- data_03 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
filter(!is.na(fips)) %>%
select(fips, X.STATE,CTYCODE,IMONTH,MENTHLTH,CVDINFR2,CVDCRHD2,CVDSTRK2,ASTHMA2,AGE,SEX,ORACE2,EDUCA,INCOME2,MARITAL,EMPLOY,HLTHPLAN,EXERANY2,X.RFSMOK2,DRNKANY3,X.BMI3CAT,X.BMI3,GENHLTH,QLACTLM2)
men_03_state = data_03 %>%
mutate(year = 2003) %>%
select(X.STATE,MENTHLTH,year)
men_03_state$MENTHLTH[men_03_state$MENTHLTH %in% c(77,88,99)] <- c(NA)
men_03_state <- men_03_state %>% group_by(X.STATE) %>%
dplyr::summarise(year = 2003,
menthlth = mean(MENTHLTH, na.rm= TRUE))
data_04 <- read.xport("~/Desktop/Harvard/fall 2017/bst260/project/CDBRFS04.XPT")
data_04 <- data_04 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
filter(!is.na(fips)) %>%
select(fips, X.STATE,CTYCODE,MENTHLTH,CVDINFR2,CVDCRHD2,CVDSTRK2,ASTHMA2,AGE,SEX,ORACE2,EDUCA,INCOME2,MARITAL,EMPLOY,HLTHPLAN,EXERANY2,X.RFSMOK2,DRNKANY3,X.BMI4CAT,X.BMI4,GENHLTH,QLACTLM2)
men_04_state = data_04 %>%
mutate(year = 2004) %>%
select(X.STATE,MENTHLTH,year)
men_04_state$MENTHLTH[men_04_state$MENTHLTH %in% c(77,88,99)] <- c(NA)
men_04_state <- men_04_state %>% group_by(X.STATE) %>%
dplyr::summarise(year = 2004,
menthlth = mean(MENTHLTH, na.rm= TRUE))
data_05 <- read.xport("~/Desktop/Harvard/fall 2017/bst260/project/CDBRFS05.XPT")
data_05 <- data_05 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
filter(!is.na(fips)) %>%
select(fips, X.STATE,CTYCODE,MSCODE,IMONTH,MENTHLTH,CVDINFR3,CVDCRHD3,CVDSTRK3,ASTHMA2,AGE,SEX,ORACE2,EDUCA,INCOME2,MARITAL,EMPLOY,HLTHPLAN,EXERANY2,X.RFSMOK3,DRNKANY4,X.BMI4CAT,X.BMI4,GENHLTH,QLACTLM2)
men_05_state = data_05 %>%
mutate(year = 2005) %>%
select(X.STATE,MENTHLTH,year)
men_05_state$MENTHLTH[men_05_state$MENTHLTH %in% c(77,88,99)] <- c(NA)
men_05_state <- men_05_state %>% group_by(X.STATE) %>%
dplyr::summarise(year = 2005,
menthlth = mean(MENTHLTH, na.rm= TRUE))
data_06 <- read.xport("~/Desktop/Harvard/fall 2017/bst260/project/CDBRFS06.XPT")
data_06 <- data_06 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
filter(!is.na(fips)) %>%
select(fips, X.STATE,CTYCODE,MSCODE,IMONTH,MENTHLTH,CVDINFR3,CVDCRHD3,CVDSTRK3,ASTHMA2,AGE,SEX,ORACE2,EDUCA,INCOME2,MARITAL,EMPLOY,HLTHPLAN,EXERANY2,X.RFSMOK3,DRNKANY4,X.BMI4CAT,X.BMI4,GENHLTH,QLACTLM2)
men_06_state = data_06 %>%
mutate(year = 2006) %>%
select(X.STATE,MENTHLTH,year)
men_06_state$MENTHLTH[men_06_state$MENTHLTH %in% c(77,88,99)] <- c(NA)
men_06_state <- men_06_state %>% group_by(X.STATE) %>%
dplyr::summarise(year = 2006,
menthlth = mean(MENTHLTH, na.rm= TRUE))
data_07 <- read.xport("~/Desktop/Harvard/fall 2017/bst260/project/CDBRFS07.XPT")
data_07 <- data_07 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
filter(!is.na(fips)) %>%
select(fips,X.STATE,CTYCODE,MSCODE,IMONTH,MENTHLTH,CVDINFR4,CVDCRHD4,CVDSTRK3,ASTHMA2,AGE,SEX,ORACE2,EDUCA,INCOME2,MARITAL,EMPLOY,HLTHPLAN,EXERANY2,X.RFSMOK3,DRNKANY4,X.BMI4CAT,X.BMI4,GENHLTH,QLACTLM2)
men_07_state = data_07 %>%
mutate(year = 2007) %>%
select(X.STATE,MENTHLTH,year)
men_07_state$MENTHLTH[men_07_state$MENTHLTH %in% c(77,88,99)] <- c(NA)
men_07_state <- men_07_state %>% group_by(X.STATE) %>%
dplyr::summarise(year = 2007,
menthlth = mean(MENTHLTH, na.rm= TRUE))
data_08 <- read.xport("~/Desktop/Harvard/fall 2017/bst260/project/CDBRFS08.XPT")
data_08 <- data_08 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
filter(!is.na(fips)) %>%
select(fips,X.STATE, CTYCODE, MSCODE,IYEAR,MENTHLTH, CVDINFR4, CVDCRHD4,CVDSTRK3, ASTHMA2, AGE, SEX, ORACE2, EDUCA, INCOME2, MARITAL, EMPLOY, HLTHPLAN, EXERANY2, X.RFSMOK3,DRNKANY4, X.BMI4CAT, X.BMI4, GENHLTH, QLACTLM2)
men_08_state = data_08 %>%
mutate(year = 2008) %>%
select(X.STATE,MENTHLTH,year)
men_08_state$MENTHLTH[men_08_state$MENTHLTH %in% c(77,88,99)] <- c(NA)
men_08_state <- men_08_state %>% group_by(X.STATE) %>%
dplyr::summarise(year = 2008,
menthlth = mean(MENTHLTH, na.rm= TRUE))
data_09 <- read.xport("~/Desktop/Harvard/fall 2017/bst260/project/CDBRFS09.XPT")
data_09 <- data_09 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
filter(!is.na(fips)) %>%
select(fips,X.STATE, CTYCODE, MSCODE,IYEAR,MENTHLTH, CVDINFR4, CVDCRHD4,CVDSTRK3, ASTHMA2, AGE, SEX, ORACE2, EDUCA, INCOME2, MARITAL, EMPLOY, HLTHPLAN, EXERANY2, X.RFSMOK3,DRNKANY4, X.BMI4CAT, X.BMI4, GENHLTH, QLACTLM2)
men_09_state = data_09 %>%
mutate(year = 2009) %>%
select(X.STATE,MENTHLTH,year)
men_09_state$MENTHLTH[men_09_state$MENTHLTH %in% c(77,88,99)] <- c(NA)
men_09_state <- men_09_state %>% group_by(X.STATE) %>%
dplyr::summarise(year = 2009,
menthlth = mean(MENTHLTH, na.rm= TRUE))
data_10 <- read.xport("~/Desktop/Harvard/fall 2017/bst260/project/CDBRFS10.XPT")
data_10 <- data_10 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
filter(!is.na(fips)) %>%
select(fips,X.STATE, CTYCODE, IYEAR,MENTHLTH, CVDINFR4, CVDCRHD4,CVDSTRK3, ASTHMA2, AGE, SEX, ORACE2, EDUCA, INCOME2, MARITAL, EMPLOY, HLTHPLAN, EXERANY2, X.RFSMOK3,DRNKANY4, X.BMI4CAT, X.BMI4, GENHLTH, QLACTLM2)
men_10_state = data_10 %>%
mutate(year = 2010) %>%
select(X.STATE,MENTHLTH,year)
men_10_state$MENTHLTH[men_10_state$MENTHLTH %in% c(77,88,99)] <- c(NA)
men_10_state <- men_10_state %>% group_by(X.STATE) %>%
dplyr::summarise(year = 2010,
menthlth = mean(MENTHLTH, na.rm= TRUE))
plot all years together:
men_state = rbind(men_00_state,men_01_state,men_02_state,men_03_state,men_04_state,men_05_state,men_06_state,men_07_state,men_08_state,men_09_state,men_10_state)
colnames(men_state)[1] = "state"
men_state = men_state %>%
left_join(st.codes,by = c("state"="state"))
write_csv(men_state,"state_menthlth.csv")
library(fiftystater)
library(RColorBrewer)
library(mapproj)
data("fifty_states")
men_state = read_csv("state_menthlth.csv")
## Parsed with column specification:
## cols(
## state = col_integer(),
## year = col_double(),
## menthlth = col_double(),
## full = col_character()
## )
state_map = men_state %>%
dplyr::select(full,menthlth,year)
max(state_map$menthlth,na.rm = T)
## [1] 18.61704
min(state_map$menthlth,na.rm = T)
## [1] 7.455927
ggplot(state_map,aes(map_id = full)) +
geom_map(aes(fill = menthlth),map = fifty_states) +
expand_limits(x = fifty_states$long,y = fifty_states$lat) +
coord_map() +
scale_x_continuous(expand=c(0,0)) +
scale_y_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors = rev(colorRampPalette(brewer.pal(9,"RdPu"))(5)),
breaks = seq(7,19,,length.out = 5),
limits = c(7,19),
labels = seq(7,19,length.out = 5)) +
ylab("") +
xlab("") +
facet_wrap(~year,ncol = 3) +
ggtitle("Number of Days Mental Health Not Good for each US state from 2000 to 2010")
Run regression for different years and get a univariate plot each year:
library(readr)
library(dplyr)
library(tidyverse)
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: purrr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
## map(): purrr, maps
library(broom)
library(dslabs)
ds_theme_set()
data_all = read_csv("data_exposure_00_10.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## fips = col_integer(),
## state = col_integer(),
## county = col_integer(),
## year = col_integer()
## )
## See spec(...) for full column specifications.
group_men_fit = data_all %>%
dplyr::select(-c(fips,state,county)) %>%
ggplot(aes(pm2.5, menthlth)) +
geom_point(alpha = 0.2,col = "dodgerblue1") +
geom_smooth(method = "lm",col = "black") +
facet_wrap(~ year) +
ggtitle("mental health vs pm 2.5")
group_men_fit
group_men_fit1 = data_all %>%
dplyr::select(-c(fips,state,county)) %>%
ggplot(aes(menthlth,ozone)) +
geom_point() +
geom_smooth(method = "lm") +
facet_wrap(~ year) +
ggtitle("mental health vs ozone")
group_men_fit1
univariate analysis for menthlth and pm 2.5:
data_all %>%
group_by(year) %>%
do(tidy(lm(menthlth ~ pm2.5, data = .),conf.int = TRUE)) %>%
filter(!grepl("Intercept", term))
## # A tibble: 11 x 8
## # Groups: year [11]
## year term estimate std.error statistic p.value conf.low
## <int> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2000 pm2.5 0.2509847 0.03115326 8.056450 3.212205e-15 0.18982374
## 2 2001 pm2.5 0.2349882 0.03506758 6.701009 3.889138e-11 0.16615350
## 3 2002 pm2.5 0.4076348 0.05853970 6.963390 1.331356e-11 0.29255856
## 4 2003 pm2.5 0.1898703 0.03484784 5.448552 6.337765e-08 0.12149025
## 5 2004 pm2.5 0.2286335 0.02852119 8.016269 2.633883e-15 0.17267485
## 6 2005 pm2.5 0.2030903 0.02435099 8.340126 1.957300e-16 0.15531651
## 7 2006 pm2.5 0.2991832 0.03221822 9.286151 7.911323e-20 0.23596916
## 8 2007 pm2.5 0.3296625 0.03427715 9.617557 1.777878e-21 0.26244336
## 9 2008 pm2.5 0.2109055 0.04081043 5.167931 2.579768e-07 0.13087465
## 10 2009 pm2.5 0.1792752 0.05146552 3.483405 5.047276e-04 0.07834929
## 11 2010 pm2.5 0.2650898 0.03630722 7.301296 3.960809e-13 0.19388981
## # ... with 1 more variables: conf.high <dbl>
heatmap for menthlth across US states from 2000 to 2010:
library(RColorBrewer)
men_state = read_csv("state_menthlth.csv")
## Parsed with column specification:
## cols(
## state = col_integer(),
## year = col_double(),
## menthlth = col_double(),
## full = col_character()
## )
men_state %>% filter(!is.na(menthlth) & !is.na(full)) %>%
ggplot(aes(x = year, y = full,fill = menthlth)) +
geom_tile(color = "grey50") +
scale_x_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors = brewer.pal(9, "YlOrRd"), trans = "sqrt") +
ylab("state") +
xlab("year") +
ggtitle("Heatmap of average menthlth in US states from 2000 to 2010")
deal with raw ndvi data and study the relationship between greenness and menthlth:
data_all = read_csv("data_exposure_00_10.csv")
ndvi_county = ndvi %>%
group_by(FIPS,year) %>%
summarise(greenness = mean(ndvimean_combined))
data_all = data_all %>%
left_join(ndvi_county,by=c("fips"="FIPS","year"="year"))
write_csv(data_all,"data_3_expo.csv")
data_all = read_csv("~/Desktop/Harvard/fall 2017/bst260/project/data_3_expo.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## fips = col_integer(),
## state = col_integer(),
## county = col_integer()
## )
## See spec(...) for full column specifications.
group_men_fit2 = data_all %>%
dplyr::select(-c(fips,state,county)) %>%
ggplot(aes(menthlth,greenness)) +
geom_point() +
geom_smooth(method = "lm") +
facet_wrap(~ year)
group_men_fit2
Draw the Pearson correlation map:
data = read_csv("~/Desktop/Harvard/fall 2017/bst260/project/data_3_expo.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## fips = col_integer(),
## state = col_integer(),
## county = col_integer()
## )
## See spec(...) for full column specifications.
data = data %>%
dplyr::select(income7,employed,out_of_work,homemaker,student,hlthplan,exercise,smoke, drink,genhlth_fa ,genhlth_po ,qlactlm,pm2.5 ,ozone,greenness)
data = data[complete.cases(data),]
cormat <- round(cor(data),2)
head(cormat)
## income7 employed out_of_work homemaker student hlthplan
## income7 1.00 0.31 -0.11 -0.05 0.04 0.25
## employed 0.31 1.00 -0.25 -0.26 0.07 0.17
## out_of_work -0.11 -0.25 1.00 -0.07 -0.03 -0.21
## homemaker -0.05 -0.26 -0.07 1.00 -0.02 -0.14
## student 0.04 0.07 -0.03 -0.02 1.00 -0.10
## hlthplan 0.25 0.17 -0.21 -0.14 -0.10 1.00
## exercise smoke drink genhlth_fa genhlth_po qlactlm pm2.5 ozone
## income7 0.26 -0.13 0.29 -0.24 -0.28 -0.22 -0.05 0.02
## employed 0.42 -0.12 0.51 -0.43 -0.49 -0.53 -0.05 0.09
## out_of_work -0.10 0.09 -0.07 0.11 0.08 0.09 0.01 -0.06
## homemaker -0.10 0.03 -0.21 0.08 0.09 0.04 0.07 0.13
## student 0.14 0.04 0.06 -0.08 -0.09 -0.14 0.07 0.08
## hlthplan 0.25 -0.29 0.34 -0.26 -0.27 -0.14 0.03 -0.05
## greenness
## income7 -0.13
## employed -0.25
## out_of_work 0.07
## homemaker -0.03
## student -0.05
## hlthplan -0.04
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
melted_cormat <- melt(cormat)
head(melted_cormat)
## Var1 Var2 value
## 1 income7 income7 1.00
## 2 employed income7 0.31
## 3 out_of_work income7 -0.11
## 4 homemaker income7 -0.05
## 5 student income7 0.04
## 6 hlthplan income7 0.25
library(ggplot2)
ggplot(data = melted_cormat, aes(x=Var1, y=Var2, fill=value)) +
geom_tile()
get_lower_tri<-function(cormat){
cormat[upper.tri(cormat)] <- NA
return(cormat)
}
# Get upper triangle of the correlation matrix
get_upper_tri <- function(cormat){
cormat[lower.tri(cormat)]<- NA
return(cormat)
}
upper_tri <- get_upper_tri(cormat)
upper_tri
## income7 employed out_of_work homemaker student hlthplan
## income7 1 0.31 -0.11 -0.05 0.04 0.25
## employed NA 1.00 -0.25 -0.26 0.07 0.17
## out_of_work NA NA 1.00 -0.07 -0.03 -0.21
## homemaker NA NA NA 1.00 -0.02 -0.14
## student NA NA NA NA 1.00 -0.10
## hlthplan NA NA NA NA NA 1.00
## exercise NA NA NA NA NA NA
## smoke NA NA NA NA NA NA
## drink NA NA NA NA NA NA
## genhlth_fa NA NA NA NA NA NA
## genhlth_po NA NA NA NA NA NA
## qlactlm NA NA NA NA NA NA
## pm2.5 NA NA NA NA NA NA
## ozone NA NA NA NA NA NA
## greenness NA NA NA NA NA NA
## exercise smoke drink genhlth_fa genhlth_po qlactlm pm2.5 ozone
## income7 0.26 -0.13 0.29 -0.24 -0.28 -0.22 -0.05 0.02
## employed 0.42 -0.12 0.51 -0.43 -0.49 -0.53 -0.05 0.09
## out_of_work -0.10 0.09 -0.07 0.11 0.08 0.09 0.01 -0.06
## homemaker -0.10 0.03 -0.21 0.08 0.09 0.04 0.07 0.13
## student 0.14 0.04 0.06 -0.08 -0.09 -0.14 0.07 0.08
## hlthplan 0.25 -0.29 0.34 -0.26 -0.27 -0.14 0.03 -0.05
## exercise 1.00 -0.31 0.58 -0.47 -0.48 -0.39 -0.20 -0.01
## smoke NA 1.00 -0.24 0.24 0.28 0.21 0.17 0.09
## drink NA NA 1.00 -0.48 -0.54 -0.40 -0.21 -0.13
## genhlth_fa NA NA NA 1.00 0.25 0.40 0.12 -0.01
## genhlth_po NA NA NA NA 1.00 0.51 0.14 0.03
## qlactlm NA NA NA NA NA 1.00 0.00 -0.06
## pm2.5 NA NA NA NA NA NA 1.00 0.24
## ozone NA NA NA NA NA NA NA 1.00
## greenness NA NA NA NA NA NA NA NA
## greenness
## income7 -0.13
## employed -0.25
## out_of_work 0.07
## homemaker -0.03
## student -0.05
## hlthplan -0.04
## exercise -0.18
## smoke 0.16
## drink -0.25
## genhlth_fa 0.17
## genhlth_po 0.24
## qlactlm 0.20
## pm2.5 0.36
## ozone -0.27
## greenness 1.00
# Melt the correlation matrix
library(reshape2)
melted_cormat <- melt(upper_tri, na.rm = TRUE)
# Heatmap
library(ggplot2)
ggplot(data = melted_cormat, aes(Var2, Var1, fill = value))+
geom_tile(color = "white")+
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1,1), space = "Lab",
name="Pearson\nCorrelation") +
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, vjust = 1,
size = 12, hjust = 1))+
coord_fixed()
reorder_cormat <- function(cormat){
# Use correlation between variables as distance
dd <- as.dist((1-cormat)/2)
hc <- hclust(dd)
cormat <-cormat[hc$order, hc$order]
}
# Reorder the correlation matrix
cormat <- reorder_cormat(cormat)
upper_tri <- get_upper_tri(cormat)
# Melt the correlation matrix
melted_cormat <- melt(upper_tri, na.rm = TRUE)
# Create a ggheatmap
ggheatmap <- ggplot(melted_cormat, aes(Var2, Var1, fill = value))+
geom_tile(color = "white")+
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1,1), space = "Lab",
name="Pearson\nCorrelation") +
theme_minimal()+ # minimal theme
theme(axis.text.x = element_text(angle = 45, vjust = 1,
size = 12, hjust = 1))+
coord_fixed()
# Print the heatmap
print(ggheatmap)
ggheatmap +
geom_text(aes(Var2, Var1, label = value), color = "black", size = 4) +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.ticks = element_blank(),
legend.justification = c(1, 0),
legend.position = c(0.6, 0.7),
legend.direction = "horizontal")+
guides(fill = guide_colorbar(barwidth = 7, barheight = 1,
title.position = "top", title.hjust = 0.5))
try to draw the fitted line vs actual value in 2010: firstly I want to label each state but it turns out there are too much so I give up
library(ggrepel)
library(dplyr)
ds_theme_set()
data_all = read.csv("data_3_expo.csv")
st.codes<-data.frame(state=as.factor(c("AK", "AL", "AR", "AZ", "CA", "CO", "CT", "DC", "DE", "FL", "GA","HI", "IA", "ID", "IL", "IN", "KS", "KY", "LA", "MA", "MD", "ME","MI", "MN", "MO", "MS", "MT", "NC", "ND", "NE", "NH", "NJ", "NM","NV", "NY", "OH", "OK", "OR", "PA", "PR", "RI", "SC", "SD", "TN", "TX", "UT", "VA", "VT", "WA", "WI", "WV", "WY")),
full=as.factor(c("alaska","alabama","arkansas","arizona","california","colorado", "connecticut","district of columbia","delaware","florida","georgia","hawaii","iowa","idaho","illinois","indiana","kansas","kentucky","louisiana","massachusetts","maryland","maine","michigan","minnesota","missouri","mississippi","montana","north carolina","north dakota","nebraska","new hampshire","new jersey","new mexico","nevada","new york","ohio","oklahoma","oregon","pennsylvania","puerto rico","rhode island","south carolina","south dakota","tennessee","texas","utah","virginia","vermont","washington","wisconsin","west virginia","wyoming")))
highlight <- c("AK", "AL", "AR", "AZ", "CA", "CO", "CT", "DC", "DE", "FL", "GA","HI", "IA", "ID", "IL", "IN", "KS", "KY", "LA", "MA", "MD", "ME","MI", "MN", "MO", "MS", "MT", "NC", "ND", "NE", "NH", "NJ", "NM","NV", "NY", "OH", "OK", "OR", "PA", "PR", "RI", "SC", "SD", "TN", "TX", "UT", "VA", "VT", "WA", "WI", "WV", "WY")
fips_data = county.fips %>%
separate(polyname, c("state", "county"), ",")
data_all = data_all %>%
left_join(fips_data,by = c("fips"="fips")) %>%
left_join(st.codes,by = c("state.y" = "full"))
data_all_10 = data_all %>%
dplyr::filter(year == 2010) %>%
mutate(menthlth_hat = predict(mod20, newdata = .))
data_all_10 %>%
ggplot(aes(menthlth_hat,menthlth,col = state,label = state)) +
geom_point(show.legend = F) +
geom_abline() +
ggtitle("fitted values vs actual values of menthlth in 2010")
Univariate analysis for income vs asthma:
library(readr)
library(dplyr)
library(tidyverse)
library(broom)
library(dslabs)
ds_theme_set()
data_all = read_csv("data_exposure_00_10.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## fips = col_integer(),
## state = col_integer(),
## county = col_integer(),
## year = col_integer()
## )
## See spec(...) for full column specifications.
group_as_fit = data_all %>%
dplyr::select(-c(fips,state,county)) %>%
ggplot(aes(income1,asthma)) +
geom_point(alpha = 0.2,col = "darkorchid2") +
geom_smooth(method = "lm",col = "black") +
facet_wrap(~ year) +
ggtitle("income1 vs asthma") +
xlim(0,0.75) +
ylim(0,0.5)
group_as_fit
Univariate analysis for heartattack vs smoke:
library(readr)
library(dplyr)
library(tidyverse)
library(broom)
library(dslabs)
ds_theme_set()
data_all = read_csv("data_exposure_00_10.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## fips = col_integer(),
## state = col_integer(),
## county = col_integer(),
## year = col_integer()
## )
## See spec(...) for full column specifications.
group_he_fit = data_all %>%
filter(year %in% c(2005,2006,2007,2008,2009,2010)) %>%
dplyr::select(-c(fips,state,county)) %>%
ggplot(aes(smoke,heartattack)) +
geom_point(alpha = 0.2,col = "indianred1") +
geom_smooth(method = "lm",col = "black") +
facet_wrap(~ year) +
ggtitle("smoke vs heartattack") +
xlim(0,0.6) +
ylim(0,0.5)
group_he_fit